diff --git a/LICENSE b/LICENSE index 4349934..c973c36 100644 --- a/LICENSE +++ b/LICENSE @@ -170,7 +170,7 @@ License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability + defend, and hold each Contributor harmless for any liability contains incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. @@ -430,4 +430,39 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - + + +For the org.apache.hive.beeline.ClassNameCompleter class: + +Copyright (c) 2002-2006, Marc Prud'hommeaux +All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the following +conditions are met: + +Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with +the distribution. + +Neither the name of JLine nor the names of its contributors +may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out index 254eeab..8d7f19c 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out @@ -498,39 +498,31 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Map Reduce @@ -556,7 +548,7 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -566,14 +558,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat diff --git a/beeline/src/java/org/apache/hive/beeline/AbstractCommandHandler.java b/beeline/src/java/org/apache/hive/beeline/AbstractCommandHandler.java index a9479d5..625879b 100644 --- a/beeline/src/java/org/apache/hive/beeline/AbstractCommandHandler.java +++ b/beeline/src/java/org/apache/hive/beeline/AbstractCommandHandler.java @@ -26,8 +26,8 @@ import java.util.LinkedList; import java.util.List; -import jline.Completor; -import jline.NullCompletor; +import jline.console.completer.Completer; +import jline.console.completer.NullCompleter; /** * An abstract implementation of CommandHandler. @@ -38,22 +38,22 @@ private final String name; private final String[] names; private final String helpText; - private Completor[] parameterCompletors = new Completor[0]; + private Completer[] parameterCompleters = new Completer[0]; protected transient Throwable lastException; public AbstractCommandHandler(BeeLine beeLine, String[] names, String helpText, - Completor[] completors) { + Completer[] completors) { this.beeLine = beeLine; name = names[0]; this.names = names; this.helpText = helpText; if (completors == null || completors.length == 0) { - parameterCompletors = new Completor[] { new NullCompletor() }; + parameterCompleters = new Completer[] { new NullCompleter() }; } else { - List c = new LinkedList(Arrays.asList(completors)); - c.add(new NullCompletor()); - parameterCompletors = c.toArray(new Completor[0]); + List c = new LinkedList(Arrays.asList(completors)); + c.add(new NullCompleter()); + parameterCompleters = c.toArray(new Completer[0]); } } @@ -94,13 +94,13 @@ public String matches(String line) { return null; } - public void setParameterCompletors(Completor[] parameterCompletors) { - this.parameterCompletors = parameterCompletors; + public void setParameterCompleters(Completer[] parameterCompleters) { + this.parameterCompleters = parameterCompleters; } @Override - public Completor[] getParameterCompletors() { - return parameterCompletors; + public Completer[] getParameterCompleters() { + return parameterCompleters; } @Override diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java b/beeline/src/java/org/apache/hive/beeline/BeeLine.java index 8539a41..630ead4 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -72,13 +72,14 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; -import jline.ClassNameCompletor; -import jline.Completor; -import jline.ConsoleReader; -import jline.FileNameCompletor; -import jline.History; -import jline.SimpleCompletor; - +import jline.console.completer.Completer; +import jline.console.completer.StringsCompleter; +import jline.console.completer.FileNameCompleter; +import jline.console.ConsoleReader; +import jline.console.history.History; +import jline.console.history.FileHistory; + +import jline.internal.Log; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.OptionBuilder; @@ -111,7 +112,7 @@ private boolean exit = false; private final DatabaseConnections connections = new DatabaseConnections(); public static final String COMMAND_PREFIX = "!"; - private final Completor beeLineCommandCompletor; + private final Completer beeLineCommandCompleter; private Collection drivers = null; private final BeeLineOpts opts = new BeeLineOpts(this, System.getProperties()); private String lastProgress = null; @@ -159,19 +160,19 @@ new ReflectiveCommandHandler(this, new String[] {"quit", "done", "exit"}, null), new ReflectiveCommandHandler(this, new String[] {"connect", "open"}, - new Completor[] {new SimpleCompletor(getConnectionURLExamples())}), + new Completer[] {new StringsCompleter(getConnectionURLExamples())}), new ReflectiveCommandHandler(this, new String[] {"describe"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"indexes"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"primarykeys"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"exportedkeys"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"manual"}, null), new ReflectiveCommandHandler(this, new String[] {"importedkeys"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"procedures"}, null), new ReflectiveCommandHandler(this, new String[] {"tables"}, @@ -179,16 +180,16 @@ new ReflectiveCommandHandler(this, new String[] {"typeinfo"}, null), new ReflectiveCommandHandler(this, new String[] {"columns"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"reconnect"}, null), new ReflectiveCommandHandler(this, new String[] {"dropall"}, - new Completor[] {new TableNameCompletor(this)}), + new Completer[] {new TableNameCompletor(this)}), new ReflectiveCommandHandler(this, new String[] {"history"}, null), new ReflectiveCommandHandler(this, new String[] {"metadata"}, - new Completor[] { - new SimpleCompletor(getMetadataMethodNames())}), + new Completer[] { + new StringsCompleter(getMetadataMethodNames())}), new ReflectiveCommandHandler(this, new String[] {"nativesql"}, null), new ReflectiveCommandHandler(this, new String[] {"dbinfo"}, @@ -198,7 +199,7 @@ new ReflectiveCommandHandler(this, new String[] {"verbose"}, null), new ReflectiveCommandHandler(this, new String[] {"run"}, - new Completor[] {new FileNameCompletor()}), + new Completer[] {new FileNameCompleter()}), new ReflectiveCommandHandler(this, new String[] {"batch"}, null), new ReflectiveCommandHandler(this, new String[] {"list"}, @@ -208,9 +209,9 @@ new ReflectiveCommandHandler(this, new String[] {"go", "#"}, null), new ReflectiveCommandHandler(this, new String[] {"script"}, - new Completor[] {new FileNameCompletor()}), + new Completer[] {new FileNameCompleter()}), new ReflectiveCommandHandler(this, new String[] {"record"}, - new Completor[] {new FileNameCompletor()}), + new Completer[] {new FileNameCompleter()}), new ReflectiveCommandHandler(this, new String[] {"brief"}, null), new ReflectiveCommandHandler(this, new String[] {"close"}, @@ -218,22 +219,22 @@ new ReflectiveCommandHandler(this, new String[] {"closeall"}, null), new ReflectiveCommandHandler(this, new String[] {"isolation"}, - new Completor[] {new SimpleCompletor(getIsolationLevels())}), + new Completer[] {new StringsCompleter(getIsolationLevels())}), new ReflectiveCommandHandler(this, new String[] {"outputformat"}, - new Completor[] {new SimpleCompletor( + new Completer[] {new StringsCompleter( formats.keySet().toArray(new String[0]))}), new ReflectiveCommandHandler(this, new String[] {"autocommit"}, null), new ReflectiveCommandHandler(this, new String[] {"commit"}, null), new ReflectiveCommandHandler(this, new String[] {"properties"}, - new Completor[] {new FileNameCompletor()}), + new Completer[] {new FileNameCompleter()}), new ReflectiveCommandHandler(this, new String[] {"rollback"}, null), new ReflectiveCommandHandler(this, new String[] {"help", "?"}, null), new ReflectiveCommandHandler(this, new String[] {"set"}, - getOpts().optionCompletors()), + getOpts().optionCompleters()), new ReflectiveCommandHandler(this, new String[] {"save"}, null), new ReflectiveCommandHandler(this, new String[] {"scan"}, @@ -245,7 +246,7 @@ new ReflectiveCommandHandler(this, new String[] {"call"}, null), new ReflectiveCommandHandler(this, new String[] {"nullemptystring"}, - new Completor[] {new BooleanCompletor()}), + new Completer[] {new BooleanCompleter()}), }; @@ -258,7 +259,7 @@ static { try { - Class.forName("jline.ConsoleReader"); + Class.forName("jline.console.ConsoleReader"); } catch (Throwable t) { throw new ExceptionInInitializerError("jline-missing"); } @@ -469,13 +470,14 @@ public static void mainWithInputRedirection(String[] args, InputStream inputStre int status = beeLine.begin(args, inputStream); if (!Boolean.getBoolean(BeeLineOpts.PROPERTY_NAME_EXIT)) { - System.exit(status); + System.exit(status); } } public BeeLine() { - beeLineCommandCompletor = new BeeLineCommandCompletor(this); + beeLineCommandCompleter = new BeeLineCommandCompleter(BeeLineCommandCompleter.getCompleters + (this)); reflector = new Reflector(this); // attempt to dynamically load signal handler @@ -737,7 +739,7 @@ public int begin(String[] args, InputStream inputStream) throws IOException { ConsoleReader reader = getConsoleReader(inputStream); return execute(reader, false); } finally { - close(); + close(); } } @@ -792,14 +794,16 @@ public void close() { public ConsoleReader getConsoleReader(InputStream inputStream) throws IOException { if (inputStream != null) { // ### NOTE: fix for sf.net bug 879425. - consoleReader = new ConsoleReader(inputStream, new PrintWriter(getOutputStream(), true)); + consoleReader = new ConsoleReader(inputStream, getOutputStream()); } else { consoleReader = new ConsoleReader(); } - // setup history - ByteArrayInputStream historyBuffer = null; + //disable the expandEvents for the purpose of backward compatibility + consoleReader.setExpandEvents(false); + // setup history + ByteArrayOutputStream hist = null; if (new File(getOpts().getHistoryFile()).isFile()) { try { // save the current contents of the history buffer. This gets @@ -808,13 +812,12 @@ public ConsoleReader getConsoleReader(InputStream inputStream) throws IOExceptio // input before the output will cause the previous commands // to not be saved to the buffer. FileInputStream historyIn = new FileInputStream(getOpts().getHistoryFile()); - ByteArrayOutputStream hist = new ByteArrayOutputStream(); + hist = new ByteArrayOutputStream(); int n; while ((n = historyIn.read()) != -1) { hist.write(n); } historyIn.close(); - historyBuffer = new ByteArrayInputStream(hist.toByteArray()); } catch (Exception e) { handleException(e); } @@ -822,25 +825,46 @@ public ConsoleReader getConsoleReader(InputStream inputStream) throws IOExceptio try { // now set the output for the history - PrintWriter historyOut = new PrintWriter(new FileWriter(getOpts().getHistoryFile()), true); - consoleReader.getHistory().setOutput(historyOut); + consoleReader.setHistory(new FileHistory(new File(getOpts().getHistoryFile()))); } catch (Exception e) { handleException(e); } if (inputStream instanceof FileInputStream) { - // from script.. no need to load history and no need of completor, either + // from script.. no need to load history and no need of completer, either return consoleReader; } try { // now load in the previous history - if (historyBuffer != null) { - consoleReader.getHistory().load(historyBuffer); + if (hist != null) { + History h = consoleReader.getHistory(); + if (h instanceof FileHistory) { + ((FileHistory) consoleReader.getHistory()).load(new ByteArrayInputStream(hist + .toByteArray())); + } else { + consoleReader.getHistory().add(hist.toString()); + } } } catch (Exception e) { - handleException(e); + handleException(e); } - consoleReader.addCompletor(new BeeLineCompletor(this)); + + // add shutdown hook to flush the history to history file + Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { + @Override + public void run() { + History h = consoleReader.getHistory(); + if (h instanceof FileHistory) { + try { + ((FileHistory) h).flush(); + } catch (IOException e) { + error(e); + } + } + } + })); + + consoleReader.addCompleter(new BeeLineCompleter(this)); return consoleReader; } @@ -1563,7 +1587,7 @@ private Driver findRegisteredDriver(String url) { if (!knownOnly) { classNames.addAll(Arrays.asList( - ClassNameCompletor.getClassNames())); + ClassNameCompleter.getClassNames())); } classNames.addAll(KNOWN_DRIVERS); @@ -1782,8 +1806,8 @@ DatabaseConnections getDatabaseConnections() { return connections; } - Completor getCommandCompletor() { - return beeLineCommandCompletor; + Completer getCommandCompletor() { + return beeLineCommandCompleter; } public boolean isExit() { diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineCommandCompleter.java b/beeline/src/java/org/apache/hive/beeline/BeeLineCommandCompleter.java new file mode 100644 index 0000000..6a872bc --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineCommandCompleter.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline; + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +import jline.console.completer.AggregateCompleter; +import jline.console.completer.Completer; +import jline.console.completer.NullCompleter; +import jline.console.completer.StringsCompleter; + +class BeeLineCommandCompleter extends AggregateCompleter { + + public BeeLineCommandCompleter(List completers) { + super(completers); + } + + public static List getCompleters(BeeLine beeLine){ + List completers = new LinkedList(); + + for (int i = 0; i < beeLine.commandHandlers.length; i++) { + String[] cmds = beeLine.commandHandlers[i].getNames(); + for (int j = 0; cmds != null && j < cmds.length; j++) { + List compl = new LinkedList(); + compl.add(new StringsCompleter(BeeLine.COMMAND_PREFIX + cmds[j])); + compl.addAll(Arrays.asList(beeLine.commandHandlers[i].getParameterCompleters())); + compl.add(new NullCompleter()); // last param no complete + completers.add(new AggregateCompleter(compl.toArray(new Completer[0]))); + } + } + return completers; + } +} \ No newline at end of file diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineCommandCompletor.java b/beeline/src/java/org/apache/hive/beeline/BeeLineCommandCompletor.java deleted file mode 100644 index 52313e6..0000000 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineCommandCompletor.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hive.beeline; - -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; - -import jline.ArgumentCompletor; -import jline.Completor; -import jline.MultiCompletor; -import jline.NullCompletor; -import jline.SimpleCompletor; - -class BeeLineCommandCompletor extends MultiCompletor { - private final BeeLine beeLine; - - public BeeLineCommandCompletor(BeeLine beeLine) { - this.beeLine = beeLine; - List completors = new LinkedList(); - - for (int i = 0; i < beeLine.commandHandlers.length; i++) { - String[] cmds = beeLine.commandHandlers[i].getNames(); - for (int j = 0; cmds != null && j < cmds.length; j++) { - Completor[] comps = beeLine.commandHandlers[i].getParameterCompletors(); - List compl = new LinkedList(); - compl.add(new SimpleCompletor(BeeLine.COMMAND_PREFIX + cmds[j])); - compl.addAll(Arrays.asList(comps)); - compl.add(new NullCompletor()); // last param no complete - completors.add(new ArgumentCompletor( - compl.toArray(new Completor[0]))); - } - } - setCompletors(completors.toArray(new Completor[0])); - } -} \ No newline at end of file diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineCompleter.java b/beeline/src/java/org/apache/hive/beeline/BeeLineCompleter.java new file mode 100644 index 0000000..87f0150 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineCompleter.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This source file is based on code taken from SQLLine 1.0.2 + * See SQLLine notice in LICENSE + */ +package org.apache.hive.beeline; + +import java.util.List; + +import jline.console.completer.Completer; + +/** + * Completor for BeeLine. It dispatches to sub-completors based on the + * current arguments. + * + */ +class BeeLineCompleter implements Completer { + private final BeeLine beeLine; + + /** + * @param beeLine + */ + BeeLineCompleter(BeeLine beeLine) { + this.beeLine = beeLine; + } + + public int complete(String buf, int pos, List cand) { + if (buf != null && buf.startsWith(BeeLine.COMMAND_PREFIX) + && !buf.startsWith(BeeLine.COMMAND_PREFIX + "all") + && !buf.startsWith(BeeLine.COMMAND_PREFIX + "sql")) { + return beeLine.getCommandCompletor().complete(buf, pos, cand); + } else { + if (beeLine.getDatabaseConnection() != null && beeLine.getDatabaseConnection().getSQLCompleter() != null) { + return beeLine.getDatabaseConnection().getSQLCompleter().complete(buf, pos, cand); + } else { + return -1; + } + } + } +} \ No newline at end of file diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineCompletor.java b/beeline/src/java/org/apache/hive/beeline/BeeLineCompletor.java deleted file mode 100644 index c6bb4fe..0000000 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineCompletor.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This source file is based on code taken from SQLLine 1.0.2 - * See SQLLine notice in LICENSE - */ -package org.apache.hive.beeline; - -import java.util.List; - -import jline.Completor; - -/** - * Completor for BeeLine. It dispatches to sub-completors based on the - * current arguments. - * - */ -class BeeLineCompletor implements Completor { - private final BeeLine beeLine; - - /** - * @param beeLine - */ - BeeLineCompletor(BeeLine beeLine) { - this.beeLine = beeLine; - } - - public int complete(String buf, int pos, List cand) { - if (buf != null && buf.startsWith(BeeLine.COMMAND_PREFIX) - && !buf.startsWith(BeeLine.COMMAND_PREFIX + "all") - && !buf.startsWith(BeeLine.COMMAND_PREFIX + "sql")) { - return beeLine.getCommandCompletor().complete(buf, pos, cand); - } else { - if (beeLine.getDatabaseConnection() != null && beeLine.getDatabaseConnection().getSQLCompletor() != null) { - return beeLine.getDatabaseConnection().getSQLCompletor().complete(buf, pos, cand); - } else { - return -1; - } - } - } -} \ No newline at end of file diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index f73fb44..649bb63 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java @@ -38,12 +38,12 @@ import java.util.Properties; import java.util.TreeSet; -import jline.Completor; -import jline.SimpleCompletor; import jline.Terminal; +import jline.TerminalFactory; +import jline.console.completer.Completer; +import jline.console.completer.StringsCompleter; - -class BeeLineOpts implements Completor { +class BeeLineOpts implements Completer { public static final int DEFAULT_MAX_WIDTH = 80; public static final int DEFAULT_MAX_HEIGHT = 80; public static final int DEFAULT_HEADER_INTERVAL = 100; @@ -70,7 +70,7 @@ private boolean showNestedErrs = false; private boolean showElapsedTime = true; private String numberFormat = "default"; - private final Terminal terminal = Terminal.setupTerminal(); + private final Terminal terminal = TerminalFactory.get(); private int maxWidth = DEFAULT_MAX_WIDTH; private int maxHeight = DEFAULT_MAX_HEIGHT; private int maxColumnWidth = 15; @@ -98,18 +98,17 @@ public BeeLineOpts(BeeLine beeLine, Properties props) { this.beeLine = beeLine; - if (terminal.getTerminalWidth() > 0) { - maxWidth = terminal.getTerminalWidth(); + if (terminal.getWidth() > 0) { + maxWidth = terminal.getWidth(); } - if (terminal.getTerminalHeight() > 0) { - maxHeight = terminal.getTerminalHeight(); + if (terminal.getHeight() > 0) { + maxHeight = terminal.getHeight(); } loadProperties(props); } - - public Completor[] optionCompletors() { - return new Completor[] {this}; + public Completer[] optionCompleters() { + return new Completer[] {this}; } public String[] possibleSettingValues() { @@ -144,7 +143,7 @@ public File saveDir() { @Override public int complete(String buf, int pos, List cand) { try { - return new SimpleCompletor(propertyNames()).complete(buf, pos, cand); + return new StringsCompleter(propertyNames()).complete(buf, pos, cand); } catch (Exception e) { beeLine.handleException(e); return -1; diff --git a/beeline/src/java/org/apache/hive/beeline/BooleanCompleter.java b/beeline/src/java/org/apache/hive/beeline/BooleanCompleter.java new file mode 100644 index 0000000..47fe70e --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/BooleanCompleter.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline; + +import jline.console.completer.StringsCompleter; + +/** + * JLine completor boolean value (true/false) + */ +class BooleanCompleter extends StringsCompleter { + + public BooleanCompleter(){ + super(new String[] {"true", "false"}); + } + +} \ No newline at end of file diff --git a/beeline/src/java/org/apache/hive/beeline/BooleanCompletor.java b/beeline/src/java/org/apache/hive/beeline/BooleanCompletor.java deleted file mode 100644 index 3e88c53..0000000 --- a/beeline/src/java/org/apache/hive/beeline/BooleanCompletor.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.hive.beeline; - -import java.util.List; - -import jline.Completor; -import jline.SimpleCompletor; - -/** - * JLine completor boolean value (true/false) - */ -class BooleanCompletor extends SimpleCompletor { - - public BooleanCompletor(){ - super(new String[] {"true", "false"}); - } - -} \ No newline at end of file diff --git a/beeline/src/java/org/apache/hive/beeline/ClassNameCompleter.java b/beeline/src/java/org/apache/hive/beeline/ClassNameCompleter.java new file mode 100644 index 0000000..065eab4 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/ClassNameCompleter.java @@ -0,0 +1,184 @@ +/** + * Copyright (c) 2002-2006, Marc Prud'hommeaux + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with + * the distribution. + * + * Neither the name of JLine nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.beeline; + +import jline.console.completer.StringsCompleter; + +import java.io.File; +import java.io.IOException; +import java.net.JarURLConnection; +import java.net.URL; +import java.net.URLClassLoader; +import java.net.URLConnection; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; +import java.util.Enumeration; +import java.util.TreeSet; + +/** + * the completer is original provided in JLine 0.9.94 and is being removed in 2.12. Add the + * previous implement for usage of the beeline. + */ +public class ClassNameCompleter extends StringsCompleter { + + public ClassNameCompleter(String... candidates) { + super(candidates); + } + + public static String[] getClassNames() throws IOException { + Set urls = new HashSet(); + + for (ClassLoader loader = ClassNameCompleter.class.getClassLoader(); loader != null; + loader = loader.getParent()) { + if (!(loader instanceof URLClassLoader)) { + continue; + } + + urls.addAll(Arrays.asList(((URLClassLoader) loader).getURLs())); + } + + // Now add the URL that holds java.lang.String. This is because + // some JVMs do not report the core classes jar in the list of + // class loaders. + Class[] systemClasses = new Class[]{String.class, javax.swing.JFrame.class}; + + for (int i = 0; i < systemClasses.length; i++) { + URL classURL = systemClasses[i] + .getResource("/" + systemClasses[i].getName().replace('.', '/') + ".class"); + + if (classURL != null) { + URLConnection uc = classURL.openConnection(); + + if (uc instanceof JarURLConnection) { + urls.add(((JarURLConnection) uc).getJarFileURL()); + } + } + } + + Set classes = new HashSet(); + + for (Iterator i = urls.iterator(); i.hasNext(); ) { + URL url = (URL) i.next(); + File file = new File(url.getFile()); + + if (file.isDirectory()) { + Set files = getClassFiles(file.getAbsolutePath(), new HashSet(), file, new int[]{200}); + classes.addAll(files); + + continue; + } + + if ((file == null) || !file.isFile()) { + continue; + } + + JarFile jf = new JarFile(file); + + for (Enumeration e = jf.entries(); e.hasMoreElements(); ) { + JarEntry entry = (JarEntry) e.nextElement(); + + if (entry == null) { + continue; + } + + String name = entry.getName(); + + if (!name.endsWith(".class")) { + /* only use class file*/ + continue; + } + + classes.add(name); + } + } + + // now filter classes by changing "/" to "." and trimming the + // trailing ".class" + Set classNames = new TreeSet(); + + for (Iterator i = classes.iterator(); i.hasNext(); ) { + String name = (String) i.next(); + classNames.add(name.replace('/', '.'). + substring(0, name.length() - 6)); + } + + return (String[]) classNames.toArray(new String[classNames.size()]); + } + + private static Set getClassFiles(String root, Set holder, File directory, int[] maxDirectories) { + // we have passed the maximum number of directories to scan + if (maxDirectories[0]-- < 0) { + return holder; + } + + File[] files = directory.listFiles(); + + for (int i = 0; (files != null) && (i < files.length); i++) { + String name = files[i].getAbsolutePath(); + + if (!(name.startsWith(root))) { + continue; + } else if (files[i].isDirectory()) { + getClassFiles(root, holder, files[i], maxDirectories); + } else if (files[i].getName().endsWith(".class")) { + holder.add(files[i].getAbsolutePath(). + substring(root.length() + 1)); + } + } + + return holder; + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/CommandHandler.java b/beeline/src/java/org/apache/hive/beeline/CommandHandler.java index bab1778..e0080df 100644 --- a/beeline/src/java/org/apache/hive/beeline/CommandHandler.java +++ b/beeline/src/java/org/apache/hive/beeline/CommandHandler.java @@ -22,7 +22,7 @@ */ package org.apache.hive.beeline; -import jline.Completor; +import jline.console.completer.Completer; /** * A generic command to be executed. Execution of the command @@ -73,7 +73,7 @@ /** * Returns the completors that can handle parameters. */ - public Completor[] getParameterCompletors(); + public Completer[] getParameterCompleters(); /** * Returns exception thrown for last command diff --git a/beeline/src/java/org/apache/hive/beeline/Commands.java b/beeline/src/java/org/apache/hive/beeline/Commands.java index 61af793..def26b6 100644 --- a/beeline/src/java/org/apache/hive/beeline/Commands.java +++ b/beeline/src/java/org/apache/hive/beeline/Commands.java @@ -128,11 +128,11 @@ public boolean metadata(String cmd, String[] args) { public boolean history(String line) { - List hist = beeLine.getConsoleReader().getHistory().getHistoryList(); + Iterator hist = beeLine.getConsoleReader().getHistory().entries(); int index = 1; - for (Iterator i = hist.iterator(); i.hasNext(); index++) { + while(hist.hasNext()){ beeLine.output(beeLine.getColorBuffer().pad(index + ".", 6) - .append(i.next().toString())); + .append(hist.next().toString())); } return true; } diff --git a/beeline/src/java/org/apache/hive/beeline/DatabaseConnection.java b/beeline/src/java/org/apache/hive/beeline/DatabaseConnection.java index ab67700..8ba0232 100644 --- a/beeline/src/java/org/apache/hive/beeline/DatabaseConnection.java +++ b/beeline/src/java/org/apache/hive/beeline/DatabaseConnection.java @@ -35,8 +35,8 @@ import java.util.Set; import java.util.TreeSet; -import jline.ArgumentCompletor; -import jline.Completor; +import jline.console.completer.ArgumentCompleter; +import jline.console.completer.Completer; class DatabaseConnection { private static final String HIVE_AUTH_USER = "user"; @@ -51,7 +51,7 @@ private final String url; private final Properties info; private Schema schema = null; - private Completor sqlCompletor = null; + private Completer sqlCompleter = null; public boolean isClosed() { return (null == connection); @@ -76,17 +76,16 @@ void setCompletions(boolean skipmeta) throws SQLException, IOException { getDatabaseMetaData() == null || getDatabaseMetaData().getExtraNameCharacters() == null ? "" : getDatabaseMetaData().getExtraNameCharacters(); - // setup the completor for the database - sqlCompletor = new ArgumentCompletor( - new SQLCompletor(beeLine, skipmeta), - new ArgumentCompletor.AbstractArgumentDelimiter() { + // setup the completer for the database + sqlCompleter = new ArgumentCompleter( + new ArgumentCompleter.AbstractArgumentDelimiter() { // delimiters for SQL statements are any // non-letter-or-number characters, except // underscore and characters that are specified // by the database to be valid name identifiers. @Override - public boolean isDelimiterChar(String buf, int pos) { - char c = buf.charAt(pos); + public boolean isDelimiterChar(CharSequence buffer, int pos) { + char c = buffer.charAt(pos); if (Character.isWhitespace(c)) { return true; } @@ -94,10 +93,10 @@ public boolean isDelimiterChar(String buf, int pos) { && c != '_' && extraNameCharacters.indexOf(c) == -1; } - }); - + }, + new SQLCompleter(SQLCompleter.getSQLCompleters(beeLine, skipmeta))); // not all argument elements need to hold true - ((ArgumentCompletor) sqlCompletor).setStrict(false); + ((ArgumentCompleter) sqlCompleter).setStrict(false); } @@ -236,8 +235,8 @@ String getUrl() { return url; } - Completor getSQLCompletor() { - return sqlCompletor; + Completer getSQLCompleter() { + return sqlCompleter; } class Schema { diff --git a/beeline/src/java/org/apache/hive/beeline/ReflectiveCommandHandler.java b/beeline/src/java/org/apache/hive/beeline/ReflectiveCommandHandler.java index 2b957f2..3b863ae 100644 --- a/beeline/src/java/org/apache/hive/beeline/ReflectiveCommandHandler.java +++ b/beeline/src/java/org/apache/hive/beeline/ReflectiveCommandHandler.java @@ -22,7 +22,7 @@ */ package org.apache.hive.beeline; -import jline.Completor; +import jline.console.completer.Completer; import org.apache.hadoop.fs.shell.Command; @@ -34,8 +34,15 @@ public class ReflectiveCommandHandler extends AbstractCommandHandler { private final BeeLine beeLine; - public ReflectiveCommandHandler(BeeLine beeLine, String[] cmds, Completor[] completor) { - super(beeLine, cmds, beeLine.loc("help-" + cmds[0]), completor); + /** + * @param beeLine + * @param cmds 'cmds' is an array of alternative names for the same command. And that the + * first one is always chosen for display purposes and to lookup help + * documentation from BeeLine.properties file. + * @param completer + */ + public ReflectiveCommandHandler(BeeLine beeLine, String[] cmds, Completer[] completer) { + super(beeLine, cmds, beeLine.loc("help-" + cmds[0]), completer); this.beeLine = beeLine; } diff --git a/beeline/src/java/org/apache/hive/beeline/SQLCompleter.java b/beeline/src/java/org/apache/hive/beeline/SQLCompleter.java new file mode 100644 index 0000000..56bf6e1 --- /dev/null +++ b/beeline/src/java/org/apache/hive/beeline/SQLCompleter.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This source file is based on code taken from SQLLine 1.0.2 + * See SQLLine notice in LICENSE + */ +package org.apache.hive.beeline; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.sql.SQLException; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.TreeSet; + +import jline.console.completer.StringsCompleter; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +class SQLCompleter extends StringsCompleter { + private static final Log LOG = LogFactory.getLog(SQLCompleter.class.getName()); + + + public SQLCompleter(Set completions){ + super(completions); + } + + public static Set getSQLCompleters(BeeLine beeLine, boolean skipmeta) + throws IOException, SQLException { + Set completions = new TreeSet(); + + // add the default SQL completions + String keywords = new BufferedReader(new InputStreamReader( + SQLCompleter.class.getResourceAsStream( + "/sql-keywords.properties"))).readLine(); + + // now add the keywords from the current connection + try { + keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getSQLKeywords(); + } catch (Exception e) { + LOG.debug("fail to get SQL key words from database metadata due to the exception: " + e, e); + } + try { + keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getStringFunctions(); + } catch (Exception e) { + LOG.debug( + "fail to get string function names from database metadata due to the exception: " + e, e); + } + try { + keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getNumericFunctions(); + } catch (Exception e) { + LOG.debug( + "fail to get numeric function names from database metadata due to the exception: " + e, e); + } + try { + keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getSystemFunctions(); + } catch (Exception e) { + LOG.debug( + "fail to get system function names from database metadata due to the exception: " + e, e); + } + try { + keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getTimeDateFunctions(); + } catch (Exception e) { + LOG.debug( + "fail to get time date function names from database metadata due to the exception: " + e, + e); + } + + // also allow lower-case versions of all the keywords + keywords += "," + keywords.toLowerCase(); + + for (StringTokenizer tok = new StringTokenizer(keywords, ", "); tok.hasMoreTokens(); completions + .add(tok.nextToken())) { + ; + } + + // now add the tables and columns from the current connection + if (!(skipmeta)) { + String[] columns = beeLine.getColumnNames(beeLine.getDatabaseConnection().getDatabaseMetaData()); + for (int i = 0; columns != null && i < columns.length; i++) { + completions.add(columns[i++]); + } + } + + return completions; + } +} diff --git a/beeline/src/java/org/apache/hive/beeline/SQLCompletor.java b/beeline/src/java/org/apache/hive/beeline/SQLCompletor.java deleted file mode 100644 index 844b9ae..0000000 --- a/beeline/src/java/org/apache/hive/beeline/SQLCompletor.java +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This source file is based on code taken from SQLLine 1.0.2 - * See SQLLine notice in LICENSE - */ -package org.apache.hive.beeline; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.sql.SQLException; -import java.util.Set; -import java.util.StringTokenizer; -import java.util.TreeSet; - -import jline.SimpleCompletor; - -class SQLCompletor extends SimpleCompletor { - private final BeeLine beeLine; - - public SQLCompletor(BeeLine beeLine, boolean skipmeta) - throws IOException, SQLException { - super(new String[0]); - this.beeLine = beeLine; - - Set completions = new TreeSet(); - - // add the default SQL completions - String keywords = new BufferedReader(new InputStreamReader( - SQLCompletor.class.getResourceAsStream( - "/sql-keywords.properties"))).readLine(); - - // now add the keywords from the current connection - try { - keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getSQLKeywords(); - } catch (Throwable t) { - } - try { - keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getStringFunctions(); - } catch (Throwable t) { - } - try { - keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getNumericFunctions(); - } catch (Throwable t) { - } - try { - keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getSystemFunctions(); - } catch (Throwable t) { - } - try { - keywords += "," + beeLine.getDatabaseConnection().getDatabaseMetaData().getTimeDateFunctions(); - } catch (Throwable t) { - } - - // also allow lower-case versions of all the keywords - keywords += "," + keywords.toLowerCase(); - - for (StringTokenizer tok = new StringTokenizer(keywords, ", "); tok.hasMoreTokens(); completions - .add(tok.nextToken())) { - ; - } - - // now add the tables and columns from the current connection - if (!(skipmeta)) { - String[] columns = beeLine.getColumnNames(beeLine.getDatabaseConnection().getDatabaseMetaData()); - for (int i = 0; columns != null && i < columns.length; i++) { - completions.add(columns[i++]); - } - } - - // set the Strings that will be completed - setCandidateStrings(completions.toArray(new String[0])); - } -} diff --git a/beeline/src/java/org/apache/hive/beeline/TableNameCompletor.java b/beeline/src/java/org/apache/hive/beeline/TableNameCompletor.java index bc0d9be..663db28 100644 --- a/beeline/src/java/org/apache/hive/beeline/TableNameCompletor.java +++ b/beeline/src/java/org/apache/hive/beeline/TableNameCompletor.java @@ -24,10 +24,10 @@ import java.util.List; -import jline.Completor; -import jline.SimpleCompletor; +import jline.console.completer.Completer; +import jline.console.completer.StringsCompleter; -class TableNameCompletor implements Completor { +class TableNameCompletor implements Completer { private final BeeLine beeLine; /** @@ -41,7 +41,7 @@ public int complete(String buf, int pos, List cand) { if (beeLine.getDatabaseConnection() == null) { return -1; } - return new SimpleCompletor(beeLine.getDatabaseConnection().getTableNames(true)) + return new StringsCompleter(beeLine.getDatabaseConnection().getTableNames(true)) .complete(buf, pos, cand); } } \ No newline at end of file diff --git a/beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java b/beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java index 95146e9..f6bb674 100644 --- a/beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java +++ b/beeline/src/test/org/apache/hive/beeline/ProxyAuthTest.java @@ -31,6 +31,7 @@ import org.apache.hive.jdbc.HiveConnection; import org.apache.hive.beeline.BeeLine; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hive.service.auth.HiveAuthFactory; /** @@ -201,7 +202,7 @@ public static void main(String[] args) throws Exception { } private static void storeTokenInJobConf(String tokenStr) throws Exception { - ShimLoader.getHadoopShims().setTokenStr(ShimLoader.getHadoopShims().getUGIForConf(new Configuration()), + Utils.setTokenStr(Utils.getUGIForConf(new Configuration()), tokenStr, HiveAuthFactory.HS2_CLIENT_TOKEN); System.out.println("Stored token " + tokenStr); } diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 0ccaacb..aec5018 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -29,17 +29,16 @@ import java.io.UnsupportedEncodingException; import java.sql.SQLException; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; -import jline.ArgumentCompletor; -import jline.ArgumentCompletor.AbstractArgumentDelimiter; -import jline.ArgumentCompletor.ArgumentDelimiter; -import jline.Completor; -import jline.ConsoleReader; -import jline.History; -import jline.SimpleCompletor; +import jline.console.ConsoleReader; +import jline.console.completer.Completer; +import jline.console.history.FileHistory; +import jline.console.completer.StringsCompleter; +import jline.console.completer.ArgumentCompleter; +import jline.console.completer.ArgumentCompleter.ArgumentDelimiter; +import jline.console.completer.ArgumentCompleter.AbstractArgumentDelimiter; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; @@ -58,7 +57,6 @@ import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; import org.apache.hadoop.hive.ql.exec.tez.TezJobExecHelper; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -474,59 +472,61 @@ public void processSelectDatabase(CliSessionState ss) throws IOException { } } - public static Completor[] getCommandCompletor () { - // SimpleCompletor matches against a pre-defined wordlist + public static Completer[] getCommandCompleter() { + // StringsCompleter matches against a pre-defined wordlist // We start with an empty wordlist and build it up - SimpleCompletor sc = new SimpleCompletor(new String[0]); + List candidateStrings = new ArrayList(); // We add Hive function names // For functions that aren't infix operators, we add an open // parenthesis at the end. for (String s : FunctionRegistry.getFunctionNames()) { if (s.matches("[a-z_]+")) { - sc.addCandidateString(s + "("); + candidateStrings.add(s + "("); } else { - sc.addCandidateString(s); + candidateStrings.add(s); } } // We add Hive keywords, including lower-cased versions for (String s : HiveParser.getKeywords()) { - sc.addCandidateString(s); - sc.addCandidateString(s.toLowerCase()); + candidateStrings.add(s); + candidateStrings.add(s.toLowerCase()); } + StringsCompleter strCompleter = new StringsCompleter(candidateStrings); + // Because we use parentheses in addition to whitespace // as a keyword delimiter, we need to define a new ArgumentDelimiter // that recognizes parenthesis as a delimiter. - ArgumentDelimiter delim = new AbstractArgumentDelimiter () { + ArgumentDelimiter delim = new AbstractArgumentDelimiter() { @Override - public boolean isDelimiterChar (String buffer, int pos) { + public boolean isDelimiterChar(CharSequence buffer, int pos) { char c = buffer.charAt(pos); return (Character.isWhitespace(c) || c == '(' || c == ')' || - c == '[' || c == ']'); + c == '[' || c == ']'); } }; // The ArgumentCompletor allows us to match multiple tokens // in the same line. - final ArgumentCompletor ac = new ArgumentCompletor(sc, delim); + final ArgumentCompleter argCompleter = new ArgumentCompleter(delim, strCompleter); // By default ArgumentCompletor is in "strict" mode meaning // a token is only auto-completed if all prior tokens // match. We don't want that since there are valid tokens // that are not in our wordlist (eg. table and column names) - ac.setStrict(false); + argCompleter.setStrict(false); // ArgumentCompletor always adds a space after a matched token. // This is undesirable for function names because a space after // the opening parenthesis is unnecessary (and uncommon) in Hive. // We stack a custom Completor on top of our ArgumentCompletor // to reverse this. - Completor completor = new Completor () { + Completer customCompletor = new Completer () { @Override public int complete (String buffer, int offset, List completions) { List comp = completions; - int ret = ac.complete(buffer, offset, completions); + int ret = argCompleter.complete(buffer, offset, completions); // ConsoleReader will do the substitution if and only if there // is exactly one valid completion, so we ignore other cases. if (completions.size() == 1) { @@ -543,16 +543,24 @@ public int complete (String buffer, int offset, List completions) { for (int i = 0; i < vars.length; i++) { vars[i] = confs[i].varname; } - SimpleCompletor conf = new SimpleCompletor(vars); - conf.setDelimiter("."); + ArgumentCompleter.ArgumentDelimiter delimiter = new ArgumentCompleter.AbstractArgumentDelimiter() { + @Override + public boolean isDelimiterChar(CharSequence buffer, int pos) { + char c = buffer.charAt(pos); + return c == '.'; + } + }; - SimpleCompletor set = new SimpleCompletor("set") { + StringsCompleter setCompleter = new StringsCompleter("set") { @Override public int complete(String buffer, int cursor, List clist) { return buffer != null && buffer.equals("set") ? super.complete(buffer, cursor, clist) : -1; } }; - ArgumentCompletor propCompletor = new ArgumentCompletor(new Completor[]{set, conf}) { + + ArgumentCompleter confCompleter = new ArgumentCompleter(delimiter, setCompleter); + + ArgumentCompleter propCompleter = new ArgumentCompleter(new Completer[]{setCompleter, confCompleter}) { @Override @SuppressWarnings("unchecked") public int complete(String buffer, int offset, List completions) { @@ -563,7 +571,7 @@ public int complete(String buffer, int offset, List completions) { return ret; } }; - return new Completor[] {propCompletor, completor}; + return new Completer[] {propCompleter, customCompletor}; } public static void main(String[] args) throws Exception { @@ -675,8 +683,8 @@ private int executeDriver(CliSessionState ss, HiveConf conf, OptionsProcessor o ConsoleReader reader = getConsoleReader(); reader.setBellEnabled(false); // reader.setDebug(new PrintWriter(new FileWriter("writer.debug", true))); - for (Completor completor : getCommandCompletor()) { - reader.addCompletor(completor); + for (Completer completer : getCommandCompleter()) { + reader.addCompleter(completer); } String line; @@ -685,7 +693,7 @@ private int executeDriver(CliSessionState ss, HiveConf conf, OptionsProcessor o try { if ((new File(historyDirectory)).exists()) { String historyFile = historyDirectory + File.separator + HISTORYFILE; - reader.setHistory(new History(new File(historyFile))); + reader.setHistory(new FileHistory(new File(historyFile))); } else { System.err.println("WARNING: Directory for Hive history file: " + historyDirectory + " does not exist. History will not be available during this session."); diff --git a/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java b/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java index 88a37d5..55607f3 100644 --- a/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java +++ b/cli/src/test/org/apache/hadoop/hive/cli/TestCliDriverMethods.java @@ -42,9 +42,9 @@ import java.util.List; import java.util.Map; -import jline.ArgumentCompletor; -import jline.Completor; -import jline.ConsoleReader; +import jline.console.ConsoleReader; +import jline.console.completer.ArgumentCompleter; +import jline.console.completer.Completer; import junit.framework.TestCase; import org.apache.commons.io.FileUtils; @@ -141,20 +141,20 @@ private PrintStream headerPrintingTestDriver(Schema mockSchema) throws CommandNe public void testGetCommandCompletor() { - Completor[] completors = CliDriver.getCommandCompletor(); + Completer[] completors = CliDriver.getCommandCompleter(); assertEquals(2, completors.length); - assertTrue(completors[0] instanceof ArgumentCompletor); - assertTrue(completors[1] instanceof Completor); + assertTrue(completors[0] instanceof ArgumentCompleter); + assertTrue(completors[1] instanceof Completer); //comletor add space after last delimeter - ListtestList=new ArrayList(Arrays.asList(new String[]{")"})); + ListtestList=new ArrayList(Arrays.asList(new String[]{")"})); completors[1].complete("fdsdfsdf", 0, testList); assertEquals(") ", testList.get(0)); - testList=new ArrayList(); + testList=new ArrayList(); completors[1].complete("len", 0, testList); - assertTrue(testList.get(0).endsWith("length(")); + assertTrue(testList.get(0).toString().endsWith("length(")); - testList=new ArrayList(); + testList=new ArrayList(); completors[0].complete("set f", 0, testList); assertEquals("set", testList.get(0)); diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 95e8d7c..2431e1a 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -30,6 +30,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DefaultFileAccess; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -41,6 +42,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; @@ -373,8 +375,8 @@ public static Path getPathOrParentThatExists(FileSystem fs, Path path) throws IO public static void checkFileAccessWithImpersonation(final FileSystem fs, final FileStatus stat, final FsAction action, final String user) throws IOException, AccessControlException, InterruptedException, Exception { - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf()); - String currentUser = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(fs.getConf()); + String currentUser = ugi.getShortUserName(); if (user == null || currentUser.equals(user)) { // No need to impersonate user, do the checks as the currently configured user. @@ -383,8 +385,9 @@ public static void checkFileAccessWithImpersonation(final FileSystem fs, } // Otherwise, try user impersonation. Current user must be configured to do user impersonation. - UserGroupInformation proxyUser = ShimLoader.getHadoopShims().createProxyUser(user); - ShimLoader.getHadoopShims().doAs(proxyUser, new PrivilegedExceptionAction() { + UserGroupInformation proxyUser = UserGroupInformation.createProxyUser( + user, UserGroupInformation.getLoginUser()); + proxyUser.doAs(new PrivilegedExceptionAction() { @Override public Object run() throws Exception { FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf()); diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 46b56bb..22f052a 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.conf.Validator.StringSet; import org.apache.hadoop.hive.conf.Validator.TimeValidator; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; @@ -384,9 +385,9 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL", "jdbc:derby:;databaseName=metastore_db;create=true", "JDBC connect string for a JDBC metastore"), - HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 1, + HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 10, "The number of times to retry a HMSHandler call if there were a connection error."), - HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "1000ms", + HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "2000ms", new TimeValidator(TimeUnit.MILLISECONDS), "The time between HMSHandler retry attempts on failure."), HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false, "Whether to force reloading of the HMSHandler configuration (including\n" + @@ -1082,6 +1083,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), + HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", true, ""), HIVENULLSCANOPTIMIZE("hive.optimize.null.scan", true, "Dont scan relations which are guaranteed to not generate any rows"), HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true, @@ -1670,6 +1672,13 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Minimum number of Thrift worker threads"), HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS("hive.server2.thrift.max.worker.threads", 500, "Maximum number of Thrift worker threads"), + HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH( + "hive.server2.thrift.exponential.backoff.slot.length", "100ms", + new TimeValidator(TimeUnit.MILLISECONDS), + "Binary exponential backoff slot time for Thrift clients during login to HiveServer2,\n" + + "for retries until hitting Thrift client timeout"), + HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT("hive.server2.thrift.login.timeout", "20s", + new TimeValidator(TimeUnit.SECONDS), "Timeout for Thrift clients during login to HiveServer2"), HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME("hive.server2.thrift.worker.keepalive.time", "60s", new TimeValidator(TimeUnit.SECONDS), "Keepalive time (in seconds) for an idle worker thread. When the number of workers exceeds min workers, " + @@ -1761,6 +1770,13 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "If set to true (default), the logged-in user determines the fair scheduler queue\n" + "for submitted jobs, so that map reduce resource usage can be tracked by user.\n" + "If set to false, all Hive jobs go to the 'hive' user's queue."), + HIVE_SERVER2_BUILTIN_UDF_WHITELIST("hive.server2.builtin.udf.whitelist", "", + "Comma separated list of builtin udf names allowed in queries.\n" + + "An empty whitelist allows all builtin udfs to be executed. " + + " The udf black list takes precedence over udf white list"), + HIVE_SERVER2_BUILTIN_UDF_BLACKLIST("hive.server2.builtin.udf.blacklist", "", + "Comma separated list of udfs names. These udfs will not be allowed in queries." + + " The udf black list takes precedence over udf white list"), HIVE_SECURITY_COMMAND_WHITELIST("hive.security.command.whitelist", "set,reset,dfs,add,list,delete,reload,compile", "Comma separated list of non-SQL Hive commands users are authorized to execute"), @@ -2756,8 +2772,7 @@ public static URL getHiveServer2SiteLocation() { */ public String getUser() throws IOException { try { - UserGroupInformation ugi = ShimLoader.getHadoopShims() - .getUGIForConf(this); + UserGroupInformation ugi = Utils.getUGIForConf(this); return ugi.getUserName(); } catch (LoginException le) { throw new IOException(le); diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index 2705f1e..78cd983 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -882,4 +882,8 @@ public static int getTextUtfLength(Text t) { } return len; } + + public static String normalizeIdentifier(String identifier) { + return identifier.trim().toLowerCase(); + } } diff --git a/data/files/parquet_array_null_element.txt b/data/files/parquet_array_null_element.txt new file mode 100644 index 0000000..14046dc --- /dev/null +++ b/data/files/parquet_array_null_element.txt @@ -0,0 +1,3 @@ +1|,7|CARRELAGE,MOQUETTE|key11:value11,key12:value12,key13:value13 +2|,|CAILLEBOTIS,| +3|,42,||key11:value11,key12:,key13: diff --git a/data/files/sour1.txt b/data/files/sour1.txt new file mode 100644 index 0000000..6e245d4 --- /dev/null +++ b/data/files/sour1.txt @@ -0,0 +1,3 @@ +1,a1,a11,a111 +2,a2,a22,a222 +3,a3,a33,a333 diff --git a/data/files/sour2.txt b/data/files/sour2.txt new file mode 100644 index 0000000..4f2ec09 --- /dev/null +++ b/data/files/sour2.txt @@ -0,0 +1,3 @@ +1,b1,b11,b111 +2,b2,b22,b222 +4,b4,b44,b444 diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java index d1530dc..3218639 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java @@ -42,9 +42,13 @@ import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier; +import org.apache.hadoop.hbase.security.token.AuthenticationTokenSelector; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping; +import org.apache.hadoop.hbase.zookeeper.ZKClusterId; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -60,11 +64,14 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.KeeperException; /** * HBaseStorageHandler provides a HiveStorageHandler implementation for @@ -450,13 +457,37 @@ private void addHBaseResources(Configuration jobConf, private void addHBaseDelegationToken(Configuration conf) throws IOException { if (User.isHBaseSecurityEnabled(conf)) { try { - User.getCurrent().obtainAuthTokenForJob(conf,new Job(conf)); + User curUser = User.getCurrent(); + Token authToken = getAuthToken(conf, curUser); + Job job = new Job(conf); + if (authToken == null) { + curUser.obtainAuthTokenForJob(conf,job); + } else { + job.getCredentials().addToken(authToken.getService(), authToken); + } } catch (InterruptedException e) { throw new IOException("Error while obtaining hbase delegation token", e); } } } + /** + * Get the authentication token of the user for the cluster specified in the configuration + * @return null if the user does not have the token, otherwise the auth token for the cluster. + */ + private static Token getAuthToken(Configuration conf, User user) + throws IOException, InterruptedException { + ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "mr-init-credentials", null); + try { + String clusterId = ZKClusterId.readClusterIdZNode(zkw); + return new AuthenticationTokenSelector().selectToken(new Text(clusterId), user.getUGI().getTokens()); + } catch (KeeperException e) { + throw new IOException(e); + } finally { + zkw.close(); + } + } + @Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { try { diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java index 2f18158..8e72759 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java @@ -84,6 +84,7 @@ implements InputFormat { static final Log LOG = LogFactory.getLog(HiveHBaseTableInputFormat.class); + private static final Object hbaseTableMonitor = new Object(); @Override public RecordReader getRecordReader( @@ -427,6 +428,12 @@ static IndexPredicateAnalyzer newIndexPredicateAnalyzer( @Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { + synchronized (hbaseTableMonitor) { + return getSplitsInternal(jobConf, numSplits); + } + } + + private InputSplit[] getSplitsInternal(JobConf jobConf, int numSplits) throws IOException { //obtain delegation tokens for the job if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) { diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index b1e7936..0b4ed37 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -268,36 +268,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: hbase_table_2 + alias: hbase_table_1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (key < 120) (type: boolean) + predicate: (100 < key) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string) TableScan - alias: hbase_table_1 + alias: hbase_table_2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (100 < key) (type: boolean) + predicate: (key < 120) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -495,39 +495,31 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Map Reduce @@ -553,7 +545,7 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -563,14 +555,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java index ffa648d..df25fea 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TException; import org.slf4j.Logger; @@ -254,7 +255,7 @@ public CacheableHiveMetaStoreClient call() throws MetaException { private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); - ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + ugi = Utils.getUGIForConf(hiveConf); this.hiveConf = hiveConf; this.threadId = threadId; } diff --git a/hcatalog/hcatalog-pig-adapter/pom.xml b/hcatalog/hcatalog-pig-adapter/pom.xml index 2d959e6..53a55dc 100644 --- a/hcatalog/hcatalog-pig-adapter/pom.xml +++ b/hcatalog/hcatalog-pig-adapter/pom.xml @@ -54,6 +54,12 @@ test + jline + jline + 0.9.94 + test + + org.apache.hive hive-exec ${project.version} diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java index 77952be..f001f72 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java @@ -28,12 +28,14 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.StringTokenizer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.SystemVariables; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.VersionInfo; import org.apache.hive.hcatalog.templeton.tool.JobState; @@ -224,14 +226,8 @@ private static void logConfigLoadAttempt(String path) { * support/debugging. Later it may be worth adding a REST call which will return this data. */ private String dumpEnvironent() { - StringBuilder sb = new StringBuilder("WebHCat environment:\n"); - Map env = System.getenv(); - List propKeys = new ArrayList(env.keySet()); - Collections.sort(propKeys); - for(String propKey : propKeys) { - sb.append(propKey).append('=').append(env.get(propKey)).append('\n'); - } - sb.append("Configration properties: \n"); + StringBuilder sb = TempletonUtils.dumpPropMap("========WebHCat System.getenv()========", System.getenv()); + sb.append("START========WebHCat AppConfig.iterator()========: \n"); Iterator> configIter = this.iterator(); List> configVals = new ArrayList>(); while(configIter.hasNext()) { @@ -245,8 +241,19 @@ public int compare(Map.Entry ent, Map.Entry ent2 }); for(Map.Entry entry : configVals) { //use get() to make sure variable substitution works - sb.append(entry.getKey()).append('=').append(get(entry.getKey())).append('\n'); + if(entry.getKey().toLowerCase().contains("path")) { + StringTokenizer st = new StringTokenizer(get(entry.getKey()), File.pathSeparator); + sb.append(entry.getKey()).append("=\n"); + while(st.hasMoreTokens()) { + sb.append(" ").append(st.nextToken()).append(File.pathSeparator).append('\n'); + } + } + else { + sb.append(entry.getKey()).append('=').append(get(entry.getKey())).append('\n'); + } } + sb.append("END========WebHCat AppConfig.iterator()========: \n"); + sb.append(TempletonUtils.dumpPropMap("========WebHCat System.getProperties()========", System.getProperties())); return sb.toString(); } public void startCleanup() { diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java index 2dfbb5f..9638e11 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java @@ -32,7 +32,7 @@ * instead map our own so that Jersey doesn't log our exceptions as * error in the output log. See SimpleExceptionMapper. */ -public class SimpleWebException extends Throwable { +public class SimpleWebException extends Exception { public int httpCode; public Map params; diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java index 902d786..fb4b8e7 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java @@ -18,6 +18,7 @@ */ package org.apache.hive.hcatalog.templeton.tool; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -34,6 +35,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Properties; +import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -408,4 +411,34 @@ public static String findContainingJar(Class clazz, String fileNamePattern) { } return null; } + public static StringBuilder dumpPropMap(String header, Properties props) { + Map map = new HashMap(); + for(Map.Entry ent : props.entrySet()) { + map.put(ent.getKey().toString(), ent.getValue() == null ? null : ent.getValue().toString()); + } + return dumpPropMap(header, map); + } + public static StringBuilder dumpPropMap(String header, Map map) { + StringBuilder sb = new StringBuilder("START").append(header).append(":\n"); + List propKeys = new ArrayList(map.keySet()); + Collections.sort(propKeys); + for(String propKey : propKeys) { + if(propKey.toLowerCase().contains("path")) { + StringTokenizer st = new StringTokenizer(map.get(propKey), File.pathSeparator); + if(st.countTokens() > 1) { + sb.append(propKey).append("=\n"); + while (st.hasMoreTokens()) { + sb.append(" ").append(st.nextToken()).append(File.pathSeparator).append('\n'); + } + } + else { + sb.append(propKey).append('=').append(map.get(propKey)).append('\n'); + } + } + else { + sb.append(propKey).append('=').append(map.get(propKey)).append('\n'); + } + } + return sb.append("END").append(header).append('\n'); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java index 32f1ca8..892d79e 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.Path; import java.io.File; import java.io.IOException; @@ -28,6 +27,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.StringTokenizer; /** @@ -61,16 +61,47 @@ public Process run(List cmd, List removeEnv, pb.environment().remove(key); } pb.environment().putAll(environmentVariables); - logDebugInfo("Starting process with env:", pb.environment()); + logDebugInfo("========Starting process with env:========", pb.environment()); + printContentsOfDir("."); return pb.start(); } private static void logDebugInfo(String msg, Map props) { - LOG.info(msg); - List keys = new ArrayList(); - keys.addAll(props.keySet()); - Collections.sort(keys); - for(String key : keys) { - LOG.info(key + "=" + props.get(key)); - } + StringBuilder sb = TempletonUtils.dumpPropMap(msg, props); + LOG.info(sb.toString()); + String sqoopHome = props.get("SQOOP_HOME"); + if(TempletonUtils.isset(sqoopHome)) { + //this is helpful when Sqoop is installed on each node in the cluster to make sure + //relevant jars (JDBC in particular) are present on the node running the command + printContentsOfDir(sqoopHome + File.separator+ "lib"); + } + } + /** + * Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep) + * time honored tradition in WebHCat of borrowing from Oozie + */ + private static void printContentsOfDir(String dir) { + File folder = new File(dir); + StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); + + File[] listOfFiles = folder.listFiles(); + for (File fileName : listOfFiles) { + if (fileName.isFile()) { + sb.append("File: ").append(fileName.getName()).append('\n'); + } + else if (fileName.isDirectory()) { + sb.append("Dir: ").append(fileName.getName()).append('\n'); + File subDir = new File(fileName.getName()); + File[] moreFiles = subDir.listFiles(); + for (File subFileName : moreFiles) { + if (subFileName.isFile()) { + sb.append("--File: ").append(subFileName.getName()).append('\n'); + } + else if (subFileName.isDirectory()) { + sb.append("--Dir: ").append(subFileName.getName()).append('\n'); + } + } + } + } + LOG.info(sb.toString()); } } diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java index 9bf5e1f..2c47912 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/MiniHiveKdc.java @@ -30,7 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.minikdc.MiniKdc; import org.apache.hadoop.security.GroupMappingServiceProvider; import org.apache.hadoop.security.UserGroupInformation; @@ -129,9 +129,9 @@ public void addUserPrincipal(String principal) throws Exception { */ public UserGroupInformation loginUser(String principal) throws Exception { - ShimLoader.getHadoopShims().loginUserFromKeytab(principal, + UserGroupInformation.loginUserFromKeytab(principal, getKeyTabFile(principal)); - return ShimLoader.getHadoopShims().getUGIForConf(conf); + return Utils.getUGIForConf(conf); } public Properties getKdcConf() { diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java index 3e46bed..954a452 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdc.java @@ -32,7 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.jdbc.HiveConnection; import org.apache.hive.jdbc.miniHS2.MiniHS2; @@ -231,7 +231,7 @@ private void verifyProperty(String propertyName, String expectedValue) throws Ex // Store the given token in the UGI private void storeToken(String tokenStr, UserGroupInformation ugi) throws Exception { - ShimLoader.getHadoopShims().setTokenStr(ugi, + Utils.setTokenStr(ugi, tokenStr, HiveAuthFactory.HS2_CLIENT_TOKEN); } diff --git a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java index 9d69952..4998031 100644 --- a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java +++ b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestMiniHiveKdc.java @@ -23,7 +23,7 @@ import java.io.File; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.junit.After; import org.junit.AfterClass; @@ -61,9 +61,8 @@ public void testLogin() throws Exception { String servicePrinc = miniHiveKdc.getHiveServicePrincipal(); assertNotNull(servicePrinc); miniHiveKdc.loginUser(servicePrinc); - assertTrue(ShimLoader.getHadoopShims().isLoginKeytabBased()); - UserGroupInformation ugi = - ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + assertTrue(UserGroupInformation.isLoginKeytabBased()); + UserGroupInformation ugi = Utils.getUGIForConf(hiveConf); assertEquals(MiniHiveKdc.HIVE_SERVICE_PRINCIPAL, ugi.getShortUserName()); } diff --git a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java index 9982195..7f063d0 100644 --- a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java +++ b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationProviderWithACL.java @@ -19,6 +19,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; import org.apache.hadoop.security.UserGroupInformation; @@ -56,7 +57,7 @@ protected HiveConf createHiveConf() throws Exception { // Hadoop FS ACLs do not work with LocalFileSystem, so set up MiniDFS. HiveConf conf = super.createHiveConf(); - String currentUserName = ShimLoader.getHadoopShims().getUGIForConf(conf).getShortUserName(); + String currentUserName = Utils.getUGIForConf(conf).getShortUserName(); conf.set("dfs.namenode.acls.enabled", "true"); conf.set("hadoop.proxyuser." + currentUserName + ".groups", "*"); conf.set("hadoop.proxyuser." + currentUserName + ".hosts", "*"); diff --git a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java index b2bdafa..3e1ce53 100644 --- a/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java +++ b/itests/hive-unit-hadoop2/src/test/java/org/apache/hadoop/hive/thrift/TestHadoop20SAuthBridge.java @@ -67,7 +67,7 @@ */ static volatile boolean isMetastoreTokenManagerInited; - private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge20S { + private static class MyHadoopThriftAuthBridge20S extends HadoopThriftAuthBridge { @Override public Server createServer(String keytabFile, String principalConf) throws TTransportException { @@ -75,7 +75,7 @@ public Server createServer(String keytabFile, String principalConf) return new Server(); } - static class Server extends HadoopThriftAuthBridge20S.Server { + static class Server extends HadoopThriftAuthBridge.Server { public Server() throws TTransportException { super(); } @@ -312,9 +312,9 @@ private String getDelegationTokenStr(UserGroupInformation ownerUgi, waitForMetastoreTokenInit(); - HadoopThriftAuthBridge20S.Server.authenticationMethod + HadoopThriftAuthBridge.Server.authenticationMethod .set(AuthenticationMethod.KERBEROS); - HadoopThriftAuthBridge20S.Server.remoteAddress.set(InetAddress.getLocalHost()); + HadoopThriftAuthBridge.Server.remoteAddress.set(InetAddress.getLocalHost()); return HiveMetaStore.getDelegationToken(ownerUgi.getShortUserName(), realUgi.getShortUserName()); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java index 1d8ac24..2d03e95 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/StorageBasedMetastoreTestBase.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; import org.junit.After; @@ -90,7 +91,7 @@ public void setUp() throws Exception { clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - ugi = ShimLoader.getHadoopShims().getUGIForConf(clientHiveConf); + ugi = Utils.getUGIForConf(clientHiveConf); SessionState.start(new CliSessionState(clientHiveConf)); msc = new HiveMetaStoreClient(clientHiveConf, null); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java index f474d83..e5318ca 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestClientSideAuthorizationProvider.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -82,7 +83,7 @@ protected void setUp() throws Exception { clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - ugi = ShimLoader.getHadoopShims().getUGIForConf(clientHiveConf); + ugi = Utils.getUGIForConf(clientHiveConf); SessionState.start(new CliSessionState(clientHiveConf)); msc = new HiveMetaStoreClient(clientHiveConf, null); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java index 3bde2fc..d63b41a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestMetastoreAuthorizationProvider.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -109,7 +110,7 @@ protected void setUp() throws Exception { clientHiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); clientHiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - ugi = ShimLoader.getHadoopShims().getUGIForConf(clientHiveConf); + ugi = Utils.getUGIForConf(clientHiveConf); SessionState.start(new CliSessionState(clientHiveConf)); msc = new HiveMetaStoreClient(clientHiveConf, null); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java index c7b27a6..0a50b79 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/TestStorageBasedMetastoreAuthorizationDrops.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim; +import org.apache.hadoop.hive.shims.Utils; import org.junit.Assert; import org.junit.Test; @@ -42,7 +43,7 @@ protected HiveConf createHiveConf() throws Exception { // Hadoop FS ACLs do not work with LocalFileSystem, so set up MiniDFS. HiveConf conf = super.createHiveConf(); - String currentUserName = ShimLoader.getHadoopShims().getUGIForConf(conf).getShortUserName(); + String currentUserName = Utils.getUGIForConf(conf).getShortUserName(); conf.set("hadoop.proxyuser." + currentUserName + ".groups", "*"); conf.set("hadoop.proxyuser." + currentUserName + ".hosts", "*"); dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java index faa51af..9848cc1 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestZooKeeperTokenStore.java @@ -70,9 +70,9 @@ protected void tearDown() throws Exception { private Configuration createConf(String zkPath) { Configuration conf = new Configuration(); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, "localhost:" + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, "localhost:" + this.zkPort); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, zkPath); + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, zkPath); return conf; } @@ -80,7 +80,7 @@ public void testTokenStorage() throws Exception { String ZK_PATH = "/zktokenstore-testTokenStorage"; ts = new ZooKeeperTokenStore(); Configuration conf = createConf(ZK_PATH); - conf.set(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "world:anyone:cdrwa"); + conf.set(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "world:anyone:cdrwa"); ts.setConf(conf); ts.init(null, ServerMode.METASTORE); @@ -128,7 +128,7 @@ public void testAclNoAuth() throws Exception { String ZK_PATH = "/zktokenstore-testAclNoAuth"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "ip:127.0.0.1:r"); ts = new ZooKeeperTokenStore(); @@ -146,7 +146,7 @@ public void testAclInvalid() throws Exception { String aclString = "sasl:hive/host@TEST.DOMAIN:cdrwa, fail-parse-ignored"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, aclString); List aclList = ZooKeeperTokenStore.parseACLs(aclString); @@ -166,7 +166,7 @@ public void testAclPositive() throws Exception { String ZK_PATH = "/zktokenstore-testAcl"; Configuration conf = createConf(ZK_PATH); conf.set( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, "ip:127.0.0.1:cdrwa,world:anyone:cdrwa"); ts = new ZooKeeperTokenStore(); ts.setConf(conf); diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index e0b6558..5087f87 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.sql.Connection; import java.sql.DriverManager; @@ -30,12 +31,14 @@ import java.sql.Statement; import java.util.HashMap; import java.util.Map; +import java.util.Set; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hive.jdbc.miniHS2.MiniHS2; import org.junit.After; import org.junit.AfterClass; @@ -387,6 +390,110 @@ public void testSessionScratchDirs() throws Exception { verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true); } + /** Test UDF whitelist + * - verify default value + * - verify udf allowed with default whitelist + * - verify udf allowed with specific whitelist + * - verify udf disallowed when not in whitelist + * @throws Exception + */ + @Test + public void testUdfWhiteList() throws Exception { + HiveConf testConf = new HiveConf(); + assertTrue(testConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST).isEmpty()); + // verify that udf in default whitelist can be executed + Statement stmt = hs2Conn.createStatement(); + stmt.executeQuery("SELECT substr('foobar', 4) "); + hs2Conn.close(); + miniHS2.stop(); + + // setup whitelist + Set funcNames = FunctionRegistry.getFunctionNames(); + funcNames.remove("reflect"); + String funcNameStr = ""; + for (String funcName : funcNames) { + funcNameStr += "," + funcName; + } + funcNameStr = funcNameStr.substring(1); // remove ',' at begining + testConf.setVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST, funcNameStr); + miniHS2 = new MiniHS2(testConf); + miniHS2.start(new HashMap()); + + hs2Conn = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); + stmt = hs2Conn.createStatement(); + // verify that udf in whitelist can be executed + stmt.executeQuery("SELECT substr('foobar', 3) "); + + // verify that udf not in whitelist fails + try { + stmt.executeQuery("SELECT reflect('java.lang.String', 'valueOf', 1) "); + fail("reflect() udf invocation should fail"); + } catch (SQLException e) { + // expected + } + } + + /** Test UDF blacklist + * - verify default value + * - verify udfs allowed with default blacklist + * - verify udf disallowed when in blacklist + * @throws Exception + */ + @Test + public void testUdfBlackList() throws Exception { + HiveConf testConf = new HiveConf(); + assertTrue(testConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST).isEmpty()); + + Statement stmt = hs2Conn.createStatement(); + // verify that udf in default whitelist can be executed + stmt.executeQuery("SELECT substr('foobar', 4) "); + + miniHS2.stop(); + testConf.setVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST, "reflect"); + miniHS2 = new MiniHS2(testConf); + miniHS2.start(new HashMap()); + hs2Conn = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); + stmt = hs2Conn.createStatement(); + + try { + stmt.executeQuery("SELECT reflect('java.lang.String', 'valueOf', 1) "); + fail("reflect() udf invocation should fail"); + } catch (SQLException e) { + // expected + } + } + + /** Test UDF blacklist overrides whitelist + * @throws Exception + */ + @Test + public void testUdfBlackListOverride() throws Exception { + // setup whitelist + HiveConf testConf = new HiveConf(); + + Set funcNames = FunctionRegistry.getFunctionNames(); + String funcNameStr = ""; + for (String funcName : funcNames) { + funcNameStr += "," + funcName; + } + funcNameStr = funcNameStr.substring(1); // remove ',' at begining + testConf.setVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST, funcNameStr); + testConf.setVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST, "reflect"); + miniHS2 = new MiniHS2(testConf); + miniHS2.start(new HashMap()); + + hs2Conn = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); + Statement stmt = hs2Conn.createStatement(); + + // verify that udf in black list fails even though it's included in whitelist + try { + stmt.executeQuery("SELECT reflect('java.lang.String', 'valueOf', 1) "); + fail("reflect() udf invocation should fail"); + } catch (SQLException e) { + // expected + } + } + /** * Tests the creation of the root hdfs scratch dir, which should be writable by all. * diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 36fa45a..e1665fd 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -269,6 +269,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ dynamic_partition_pruning.q,\ dynamic_partition_pruning_2.q,\ mapjoin_decimal.q,\ + lvj_mapjoin.q, \ mrr.q,\ tez_bmj_schema_evolution.q,\ tez_dml.q,\ diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index d25800b..d611eb6 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -125,12 +125,10 @@ protected HiveConf conf; private Driver drv; private BaseSemanticAnalyzer sem; - private FileSystem fs; protected final boolean overWrite; private CliDriver cliDriver; private HadoopShims.MiniMrShim mr = null; private HadoopShims.MiniDFSShim dfs = null; - private boolean miniMr = false; private String hadoopVer = null; private QTestSetup setup = null; private boolean isSessionStateStarted = false; @@ -312,7 +310,6 @@ public QTestUtil(String outDir, String logDir, MiniClusterType clusterType, System.out.println("Setting hive-site: "+HiveConf.getHiveSiteLocation()); } conf = new HiveConf(Driver.class); - this.miniMr = (clusterType == MiniClusterType.mr); this.hadoopVer = getHadoopMainVersion(hadoopVer); qMap = new TreeMap(); qSkipSet = new HashSet(); @@ -654,17 +651,6 @@ public void cleanUp() throws Exception { FunctionRegistry.unregisterTemporaryUDF("test_error"); } - private void runLoadCmd(String loadCmd) throws Exception { - int ecode = 0; - ecode = drv.run(loadCmd).getResponseCode(); - drv.close(); - if (ecode != 0) { - throw new Exception("load command: " + loadCmd - + " failed with exit code= " + ecode); - } - return; - } - protected void runCreateTableCmd(String createTableCmd) throws Exception { int ecode = 0; ecode = drv.run(createTableCmd).getResponseCode(); @@ -715,7 +701,6 @@ public void init() throws Exception { SessionState.start(conf); conf.set("hive.execution.engine", execEngine); db = Hive.get(conf); - fs = FileSystem.get(conf); drv = new Driver(conf); drv.init(); pd = new ParseDriver(); diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java index cfac55b..764a3f1 100644 --- a/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java +++ b/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java @@ -414,8 +414,7 @@ private String getClientDelegationToken(Map jdbcConnConf) if (JdbcConnectionParams.AUTH_TOKEN.equalsIgnoreCase(jdbcConnConf.get(JdbcConnectionParams.AUTH_TYPE))) { // check delegation token in job conf if any try { - tokenStr = ShimLoader.getHadoopShims(). - getTokenStrForm(HiveAuthFactory.HS2_CLIENT_TOKEN); + tokenStr = org.apache.hadoop.hive.shims.Utils.getTokenStrForm(HiveAuthFactory.HS2_CLIENT_TOKEN); } catch (IOException e) { throw new SQLException("Error reading token ", e); } diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index 2113263..f5a0e64 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -809,6 +809,9 @@ service ThriftHiveMetastore extends fb303.FacebookService void alter_table_with_environment_context(1:string dbname, 2:string tbl_name, 3:Table new_tbl, 4:EnvironmentContext environment_context) throws (1:InvalidOperationException o1, 2:MetaException o2) + // alter table not only applies to future partitions but also cascade to existing partitions + void alter_table_with_cascade(1:string dbname, 2:string tbl_name, 3:Table new_tbl, 4:bool cascade) + throws (1:InvalidOperationException o1, 2:MetaException o2) // the following applies to only tables that have partitions // * See notes on DDL_TIME Partition add_partition(1:Partition new_part) diff --git a/metastore/scripts/upgrade/postgres/019-HIVE-7784.postgres.sql b/metastore/scripts/upgrade/postgres/019-HIVE-7784.postgres.sql index 8c452a0..ac6b749 100644 --- a/metastore/scripts/upgrade/postgres/019-HIVE-7784.postgres.sql +++ b/metastore/scripts/upgrade/postgres/019-HIVE-7784.postgres.sql @@ -1,29 +1 @@ --- --- Create the table if it doesn't exist. --- - -CREATE TABLE IF NOT EXISTS "PART_COL_STATS" ( - "CS_ID" bigint NOT NULL, - "DB_NAME" character varying(128) DEFAULT NULL::character varying, - "TABLE_NAME" character varying(128) DEFAULT NULL::character varying, - "PARTITION_NAME" character varying(767) DEFAULT NULL::character varying, - "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying, - "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying, - "PART_ID" bigint NOT NULL, - "LONG_LOW_VALUE" bigint, - "LONG_HIGH_VALUE" bigint, - "DOUBLE_LOW_VALUE" double precision, - "DOUBLE_HIGH_VALUE" double precision, - "BIG_DECIMAL_LOW_VALUE" character varying(4000) DEFAULT NULL::character varying, - "BIG_DECIMAL_HIGH_VALUE" character varying(4000) DEFAULT NULL::character varying, - "NUM_NULLS" bigint NOT NULL, - "NUM_DISTINCTS" bigint, - "AVG_COL_LEN" double precision, - "MAX_COL_LEN" bigint, - "NUM_TRUES" bigint, - "NUM_FALSES" bigint, - "LAST_ANALYZED" bigint NOT NULL -); - - CREATE INDEX "PCS_STATS_IDX" ON "PART_COL_STATS" USING btree ("DB_NAME","TABLE_NAME","COLUMN_NAME","PARTITION_NAME"); diff --git a/metastore/scripts/upgrade/postgres/pre-0-upgrade-0.13.0-to-0.14.0.postgres.sql b/metastore/scripts/upgrade/postgres/pre-0-upgrade-0.13.0-to-0.14.0.postgres.sql new file mode 100644 index 0000000..7d00158 --- /dev/null +++ b/metastore/scripts/upgrade/postgres/pre-0-upgrade-0.13.0-to-0.14.0.postgres.sql @@ -0,0 +1,23 @@ + +CREATE TABLE "PART_COL_STATS" ( + "CS_ID" bigint NOT NULL, + "DB_NAME" character varying(128) DEFAULT NULL::character varying, + "TABLE_NAME" character varying(128) DEFAULT NULL::character varying, + "PARTITION_NAME" character varying(767) DEFAULT NULL::character varying, + "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying, + "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying, + "PART_ID" bigint NOT NULL, + "LONG_LOW_VALUE" bigint, + "LONG_HIGH_VALUE" bigint, + "DOUBLE_LOW_VALUE" double precision, + "DOUBLE_HIGH_VALUE" double precision, + "BIG_DECIMAL_LOW_VALUE" character varying(4000) DEFAULT NULL::character varying, + "BIG_DECIMAL_HIGH_VALUE" character varying(4000) DEFAULT NULL::character varying, + "NUM_NULLS" bigint NOT NULL, + "NUM_DISTINCTS" bigint, + "AVG_COL_LEN" double precision, + "MAX_COL_LEN" bigint, + "NUM_TRUES" bigint, + "NUM_FALSES" bigint, + "LAST_ANALYZED" bigint NOT NULL +); diff --git a/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp b/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp index 2f9c6e0..68cc668 100644 --- a/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp +++ b/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp @@ -5592,6 +5592,236 @@ uint32_t ThriftHiveMetastore_alter_table_with_environment_context_presult::read( return xfer; } +uint32_t ThriftHiveMetastore_alter_table_with_cascade_args::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->dbname); + this->__isset.dbname = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->tbl_name); + this->__isset.tbl_name = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->new_tbl.read(iprot); + this->__isset.new_tbl = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->cascade); + this->__isset.cascade = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t ThriftHiveMetastore_alter_table_with_cascade_args::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + xfer += oprot->writeStructBegin("ThriftHiveMetastore_alter_table_with_cascade_args"); + + xfer += oprot->writeFieldBegin("dbname", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->dbname); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("tbl_name", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString(this->tbl_name); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("new_tbl", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->new_tbl.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("cascade", ::apache::thrift::protocol::T_BOOL, 4); + xfer += oprot->writeBool(this->cascade); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +uint32_t ThriftHiveMetastore_alter_table_with_cascade_pargs::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + xfer += oprot->writeStructBegin("ThriftHiveMetastore_alter_table_with_cascade_pargs"); + + xfer += oprot->writeFieldBegin("dbname", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString((*(this->dbname))); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("tbl_name", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString((*(this->tbl_name))); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("new_tbl", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += (*(this->new_tbl)).write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("cascade", ::apache::thrift::protocol::T_BOOL, 4); + xfer += oprot->writeBool((*(this->cascade))); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +uint32_t ThriftHiveMetastore_alter_table_with_cascade_result::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->o1.read(iprot); + this->__isset.o1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->o2.read(iprot); + this->__isset.o2 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t ThriftHiveMetastore_alter_table_with_cascade_result::write(::apache::thrift::protocol::TProtocol* oprot) const { + + uint32_t xfer = 0; + + xfer += oprot->writeStructBegin("ThriftHiveMetastore_alter_table_with_cascade_result"); + + if (this->__isset.o1) { + xfer += oprot->writeFieldBegin("o1", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->o1.write(oprot); + xfer += oprot->writeFieldEnd(); + } else if (this->__isset.o2) { + xfer += oprot->writeFieldBegin("o2", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->o2.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +uint32_t ThriftHiveMetastore_alter_table_with_cascade_presult::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->o1.read(iprot); + this->__isset.o1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->o2.read(iprot); + this->__isset.o2 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + uint32_t ThriftHiveMetastore_add_partition_args::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t xfer = 0; @@ -28447,6 +28677,68 @@ void ThriftHiveMetastoreClient::recv_alter_table_with_environment_context() return; } +void ThriftHiveMetastoreClient::alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade) +{ + send_alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade); + recv_alter_table_with_cascade(); +} + +void ThriftHiveMetastoreClient::send_alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade) +{ + int32_t cseqid = 0; + oprot_->writeMessageBegin("alter_table_with_cascade", ::apache::thrift::protocol::T_CALL, cseqid); + + ThriftHiveMetastore_alter_table_with_cascade_pargs args; + args.dbname = &dbname; + args.tbl_name = &tbl_name; + args.new_tbl = &new_tbl; + args.cascade = &cascade; + args.write(oprot_); + + oprot_->writeMessageEnd(); + oprot_->getTransport()->writeEnd(); + oprot_->getTransport()->flush(); +} + +void ThriftHiveMetastoreClient::recv_alter_table_with_cascade() +{ + + int32_t rseqid = 0; + std::string fname; + ::apache::thrift::protocol::TMessageType mtype; + + iprot_->readMessageBegin(fname, mtype, rseqid); + if (mtype == ::apache::thrift::protocol::T_EXCEPTION) { + ::apache::thrift::TApplicationException x; + x.read(iprot_); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + throw x; + } + if (mtype != ::apache::thrift::protocol::T_REPLY) { + iprot_->skip(::apache::thrift::protocol::T_STRUCT); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + } + if (fname.compare("alter_table_with_cascade") != 0) { + iprot_->skip(::apache::thrift::protocol::T_STRUCT); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + } + ThriftHiveMetastore_alter_table_with_cascade_presult result; + result.read(iprot_); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + + if (result.__isset.o1) { + throw result.o1; + } + if (result.__isset.o2) { + throw result.o2; + } + return; +} + void ThriftHiveMetastoreClient::add_partition(Partition& _return, const Partition& new_part) { send_add_partition(new_part); @@ -35825,6 +36117,65 @@ void ThriftHiveMetastoreProcessor::process_alter_table_with_environment_context( } } +void ThriftHiveMetastoreProcessor::process_alter_table_with_cascade(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext) +{ + void* ctx = NULL; + if (this->eventHandler_.get() != NULL) { + ctx = this->eventHandler_->getContext("ThriftHiveMetastore.alter_table_with_cascade", callContext); + } + ::apache::thrift::TProcessorContextFreer freer(this->eventHandler_.get(), ctx, "ThriftHiveMetastore.alter_table_with_cascade"); + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->preRead(ctx, "ThriftHiveMetastore.alter_table_with_cascade"); + } + + ThriftHiveMetastore_alter_table_with_cascade_args args; + args.read(iprot); + iprot->readMessageEnd(); + uint32_t bytes = iprot->getTransport()->readEnd(); + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->postRead(ctx, "ThriftHiveMetastore.alter_table_with_cascade", bytes); + } + + ThriftHiveMetastore_alter_table_with_cascade_result result; + try { + iface_->alter_table_with_cascade(args.dbname, args.tbl_name, args.new_tbl, args.cascade); + } catch (InvalidOperationException &o1) { + result.o1 = o1; + result.__isset.o1 = true; + } catch (MetaException &o2) { + result.o2 = o2; + result.__isset.o2 = true; + } catch (const std::exception& e) { + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->handlerError(ctx, "ThriftHiveMetastore.alter_table_with_cascade"); + } + + ::apache::thrift::TApplicationException x(e.what()); + oprot->writeMessageBegin("alter_table_with_cascade", ::apache::thrift::protocol::T_EXCEPTION, seqid); + x.write(oprot); + oprot->writeMessageEnd(); + oprot->getTransport()->writeEnd(); + oprot->getTransport()->flush(); + return; + } + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->preWrite(ctx, "ThriftHiveMetastore.alter_table_with_cascade"); + } + + oprot->writeMessageBegin("alter_table_with_cascade", ::apache::thrift::protocol::T_REPLY, seqid); + result.write(oprot); + oprot->writeMessageEnd(); + bytes = oprot->getTransport()->writeEnd(); + oprot->getTransport()->flush(); + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->postWrite(ctx, "ThriftHiveMetastore.alter_table_with_cascade", bytes); + } +} + void ThriftHiveMetastoreProcessor::process_add_partition(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext) { void* ctx = NULL; diff --git a/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h b/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h index c093534..7ebb423 100644 --- a/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h +++ b/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h @@ -41,6 +41,7 @@ class ThriftHiveMetastoreIf : virtual public ::facebook::fb303::FacebookService virtual void get_table_names_by_filter(std::vector & _return, const std::string& dbname, const std::string& filter, const int16_t max_tables) = 0; virtual void alter_table(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl) = 0; virtual void alter_table_with_environment_context(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const EnvironmentContext& environment_context) = 0; + virtual void alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade) = 0; virtual void add_partition(Partition& _return, const Partition& new_part) = 0; virtual void add_partition_with_environment_context(Partition& _return, const Partition& new_part, const EnvironmentContext& environment_context) = 0; virtual int32_t add_partitions(const std::vector & new_parts) = 0; @@ -238,6 +239,9 @@ class ThriftHiveMetastoreNull : virtual public ThriftHiveMetastoreIf , virtual p void alter_table_with_environment_context(const std::string& /* dbname */, const std::string& /* tbl_name */, const Table& /* new_tbl */, const EnvironmentContext& /* environment_context */) { return; } + void alter_table_with_cascade(const std::string& /* dbname */, const std::string& /* tbl_name */, const Table& /* new_tbl */, const bool /* cascade */) { + return; + } void add_partition(Partition& /* _return */, const Partition& /* new_part */) { return; } @@ -3858,6 +3862,151 @@ class ThriftHiveMetastore_alter_table_with_environment_context_presult { }; +typedef struct _ThriftHiveMetastore_alter_table_with_cascade_args__isset { + _ThriftHiveMetastore_alter_table_with_cascade_args__isset() : dbname(false), tbl_name(false), new_tbl(false), cascade(false) {} + bool dbname; + bool tbl_name; + bool new_tbl; + bool cascade; +} _ThriftHiveMetastore_alter_table_with_cascade_args__isset; + +class ThriftHiveMetastore_alter_table_with_cascade_args { + public: + + ThriftHiveMetastore_alter_table_with_cascade_args() : dbname(), tbl_name(), cascade(0) { + } + + virtual ~ThriftHiveMetastore_alter_table_with_cascade_args() throw() {} + + std::string dbname; + std::string tbl_name; + Table new_tbl; + bool cascade; + + _ThriftHiveMetastore_alter_table_with_cascade_args__isset __isset; + + void __set_dbname(const std::string& val) { + dbname = val; + } + + void __set_tbl_name(const std::string& val) { + tbl_name = val; + } + + void __set_new_tbl(const Table& val) { + new_tbl = val; + } + + void __set_cascade(const bool val) { + cascade = val; + } + + bool operator == (const ThriftHiveMetastore_alter_table_with_cascade_args & rhs) const + { + if (!(dbname == rhs.dbname)) + return false; + if (!(tbl_name == rhs.tbl_name)) + return false; + if (!(new_tbl == rhs.new_tbl)) + return false; + if (!(cascade == rhs.cascade)) + return false; + return true; + } + bool operator != (const ThriftHiveMetastore_alter_table_with_cascade_args &rhs) const { + return !(*this == rhs); + } + + bool operator < (const ThriftHiveMetastore_alter_table_with_cascade_args & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + + +class ThriftHiveMetastore_alter_table_with_cascade_pargs { + public: + + + virtual ~ThriftHiveMetastore_alter_table_with_cascade_pargs() throw() {} + + const std::string* dbname; + const std::string* tbl_name; + const Table* new_tbl; + const bool* cascade; + + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +typedef struct _ThriftHiveMetastore_alter_table_with_cascade_result__isset { + _ThriftHiveMetastore_alter_table_with_cascade_result__isset() : o1(false), o2(false) {} + bool o1; + bool o2; +} _ThriftHiveMetastore_alter_table_with_cascade_result__isset; + +class ThriftHiveMetastore_alter_table_with_cascade_result { + public: + + ThriftHiveMetastore_alter_table_with_cascade_result() { + } + + virtual ~ThriftHiveMetastore_alter_table_with_cascade_result() throw() {} + + InvalidOperationException o1; + MetaException o2; + + _ThriftHiveMetastore_alter_table_with_cascade_result__isset __isset; + + void __set_o1(const InvalidOperationException& val) { + o1 = val; + } + + void __set_o2(const MetaException& val) { + o2 = val; + } + + bool operator == (const ThriftHiveMetastore_alter_table_with_cascade_result & rhs) const + { + if (!(o1 == rhs.o1)) + return false; + if (!(o2 == rhs.o2)) + return false; + return true; + } + bool operator != (const ThriftHiveMetastore_alter_table_with_cascade_result &rhs) const { + return !(*this == rhs); + } + + bool operator < (const ThriftHiveMetastore_alter_table_with_cascade_result & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +typedef struct _ThriftHiveMetastore_alter_table_with_cascade_presult__isset { + _ThriftHiveMetastore_alter_table_with_cascade_presult__isset() : o1(false), o2(false) {} + bool o1; + bool o2; +} _ThriftHiveMetastore_alter_table_with_cascade_presult__isset; + +class ThriftHiveMetastore_alter_table_with_cascade_presult { + public: + + + virtual ~ThriftHiveMetastore_alter_table_with_cascade_presult() throw() {} + + InvalidOperationException o1; + MetaException o2; + + _ThriftHiveMetastore_alter_table_with_cascade_presult__isset __isset; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + +}; + typedef struct _ThriftHiveMetastore_add_partition_args__isset { _ThriftHiveMetastore_add_partition_args__isset() : new_part(false) {} bool new_part; @@ -16505,6 +16654,9 @@ class ThriftHiveMetastoreClient : virtual public ThriftHiveMetastoreIf, public void alter_table_with_environment_context(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const EnvironmentContext& environment_context); void send_alter_table_with_environment_context(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const EnvironmentContext& environment_context); void recv_alter_table_with_environment_context(); + void alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade); + void send_alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade); + void recv_alter_table_with_cascade(); void add_partition(Partition& _return, const Partition& new_part); void send_add_partition(const Partition& new_part); void recv_add_partition(Partition& _return); @@ -16813,6 +16965,7 @@ class ThriftHiveMetastoreProcessor : public ::facebook::fb303::FacebookServiceP void process_get_table_names_by_filter(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_alter_table(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_alter_table_with_environment_context(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); + void process_alter_table_with_cascade(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_add_partition(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_add_partition_with_environment_context(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_add_partitions(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); @@ -16933,6 +17086,7 @@ class ThriftHiveMetastoreProcessor : public ::facebook::fb303::FacebookServiceP processMap_["get_table_names_by_filter"] = &ThriftHiveMetastoreProcessor::process_get_table_names_by_filter; processMap_["alter_table"] = &ThriftHiveMetastoreProcessor::process_alter_table; processMap_["alter_table_with_environment_context"] = &ThriftHiveMetastoreProcessor::process_alter_table_with_environment_context; + processMap_["alter_table_with_cascade"] = &ThriftHiveMetastoreProcessor::process_alter_table_with_cascade; processMap_["add_partition"] = &ThriftHiveMetastoreProcessor::process_add_partition; processMap_["add_partition_with_environment_context"] = &ThriftHiveMetastoreProcessor::process_add_partition_with_environment_context; processMap_["add_partitions"] = &ThriftHiveMetastoreProcessor::process_add_partitions; @@ -17295,6 +17449,15 @@ class ThriftHiveMetastoreMultiface : virtual public ThriftHiveMetastoreIf, publi ifaces_[i]->alter_table_with_environment_context(dbname, tbl_name, new_tbl, environment_context); } + void alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade) { + size_t sz = ifaces_.size(); + size_t i = 0; + for (; i < (sz - 1); ++i) { + ifaces_[i]->alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade); + } + ifaces_[i]->alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade); + } + void add_partition(Partition& _return, const Partition& new_part) { size_t sz = ifaces_.size(); size_t i = 0; diff --git a/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp b/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp index 0973e84..e5c1fcb 100644 --- a/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp +++ b/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp @@ -147,6 +147,11 @@ class ThriftHiveMetastoreHandler : virtual public ThriftHiveMetastoreIf { printf("alter_table_with_environment_context\n"); } + void alter_table_with_cascade(const std::string& dbname, const std::string& tbl_name, const Table& new_tbl, const bool cascade) { + // Your implementation goes here + printf("alter_table_with_cascade\n"); + } + void add_partition(Partition& _return, const Partition& new_part) { // Your implementation goes here printf("add_partition\n"); diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java index bf62916..09fd71b 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java @@ -88,6 +88,8 @@ public void alter_table_with_environment_context(String dbname, String tbl_name, Table new_tbl, EnvironmentContext environment_context) throws InvalidOperationException, MetaException, org.apache.thrift.TException; + public void alter_table_with_cascade(String dbname, String tbl_name, Table new_tbl, boolean cascade) throws InvalidOperationException, MetaException, org.apache.thrift.TException; + public Partition add_partition(Partition new_part) throws InvalidObjectException, AlreadyExistsException, MetaException, org.apache.thrift.TException; public Partition add_partition_with_environment_context(Partition new_part, EnvironmentContext environment_context) throws InvalidObjectException, AlreadyExistsException, MetaException, org.apache.thrift.TException; @@ -324,6 +326,8 @@ public void alter_table_with_environment_context(String dbname, String tbl_name, Table new_tbl, EnvironmentContext environment_context, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + public void alter_table_with_cascade(String dbname, String tbl_name, Table new_tbl, boolean cascade, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + public void add_partition(Partition new_part, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; public void add_partition_with_environment_context(Partition new_part, EnvironmentContext environment_context, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; @@ -1256,6 +1260,35 @@ public void recv_alter_table_with_environment_context() throws InvalidOperationE return; } + public void alter_table_with_cascade(String dbname, String tbl_name, Table new_tbl, boolean cascade) throws InvalidOperationException, MetaException, org.apache.thrift.TException + { + send_alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade); + recv_alter_table_with_cascade(); + } + + public void send_alter_table_with_cascade(String dbname, String tbl_name, Table new_tbl, boolean cascade) throws org.apache.thrift.TException + { + alter_table_with_cascade_args args = new alter_table_with_cascade_args(); + args.setDbname(dbname); + args.setTbl_name(tbl_name); + args.setNew_tbl(new_tbl); + args.setCascade(cascade); + sendBase("alter_table_with_cascade", args); + } + + public void recv_alter_table_with_cascade() throws InvalidOperationException, MetaException, org.apache.thrift.TException + { + alter_table_with_cascade_result result = new alter_table_with_cascade_result(); + receiveBase(result, "alter_table_with_cascade"); + if (result.o1 != null) { + throw result.o1; + } + if (result.o2 != null) { + throw result.o2; + } + return; + } + public Partition add_partition(Partition new_part) throws InvalidObjectException, AlreadyExistsException, MetaException, org.apache.thrift.TException { send_add_partition(new_part); @@ -4848,6 +4881,47 @@ public void getResult() throws InvalidOperationException, MetaException, org.apa } } + public void alter_table_with_cascade(String dbname, String tbl_name, Table new_tbl, boolean cascade, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { + checkReady(); + alter_table_with_cascade_call method_call = new alter_table_with_cascade_call(dbname, tbl_name, new_tbl, cascade, resultHandler, this, ___protocolFactory, ___transport); + this.___currentMethod = method_call; + ___manager.call(method_call); + } + + public static class alter_table_with_cascade_call extends org.apache.thrift.async.TAsyncMethodCall { + private String dbname; + private String tbl_name; + private Table new_tbl; + private boolean cascade; + public alter_table_with_cascade_call(String dbname, String tbl_name, Table new_tbl, boolean cascade, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { + super(client, protocolFactory, transport, resultHandler, false); + this.dbname = dbname; + this.tbl_name = tbl_name; + this.new_tbl = new_tbl; + this.cascade = cascade; + } + + public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { + prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("alter_table_with_cascade", org.apache.thrift.protocol.TMessageType.CALL, 0)); + alter_table_with_cascade_args args = new alter_table_with_cascade_args(); + args.setDbname(dbname); + args.setTbl_name(tbl_name); + args.setNew_tbl(new_tbl); + args.setCascade(cascade); + args.write(prot); + prot.writeMessageEnd(); + } + + public void getResult() throws InvalidOperationException, MetaException, org.apache.thrift.TException { + if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { + throw new IllegalStateException("Method call not finished!"); + } + org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); + org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); + (new Client(prot)).recv_alter_table_with_cascade(); + } + } + public void add_partition(Partition new_part, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { checkReady(); add_partition_call method_call = new add_partition_call(new_part, resultHandler, this, ___protocolFactory, ___transport); @@ -8167,6 +8241,7 @@ protected Processor(I iface, Map extends org.apache.thrift.ProcessFunction { + public alter_table_with_cascade() { + super("alter_table_with_cascade"); + } + + public alter_table_with_cascade_args getEmptyArgsInstance() { + return new alter_table_with_cascade_args(); + } + + protected boolean isOneway() { + return false; + } + + public alter_table_with_cascade_result getResult(I iface, alter_table_with_cascade_args args) throws org.apache.thrift.TException { + alter_table_with_cascade_result result = new alter_table_with_cascade_result(); + try { + iface.alter_table_with_cascade(args.dbname, args.tbl_name, args.new_tbl, args.cascade); + } catch (InvalidOperationException o1) { + result.o1 = o1; + } catch (MetaException o2) { + result.o2 = o2; + } + return result; + } + } + public static class add_partition extends org.apache.thrift.ProcessFunction { public add_partition() { super("add_partition"); @@ -34211,17 +34312,1142 @@ public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache. @Override public String toString() { - StringBuilder sb = new StringBuilder("get_table_names_by_filter_result("); + StringBuilder sb = new StringBuilder("get_table_names_by_filter_result("); + boolean first = true; + + sb.append("success:"); + if (this.success == null) { + sb.append("null"); + } else { + sb.append(this.success); + } + first = false; + if (!first) sb.append(", "); + sb.append("o1:"); + if (this.o1 == null) { + sb.append("null"); + } else { + sb.append(this.o1); + } + first = false; + if (!first) sb.append(", "); + sb.append("o2:"); + if (this.o2 == null) { + sb.append("null"); + } else { + sb.append(this.o2); + } + first = false; + if (!first) sb.append(", "); + sb.append("o3:"); + if (this.o3 == null) { + sb.append("null"); + } else { + sb.append(this.o3); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + // check for sub-struct validity + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class get_table_names_by_filter_resultStandardSchemeFactory implements SchemeFactory { + public get_table_names_by_filter_resultStandardScheme getScheme() { + return new get_table_names_by_filter_resultStandardScheme(); + } + } + + private static class get_table_names_by_filter_resultStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 0: // SUCCESS + if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { + { + org.apache.thrift.protocol.TList _list566 = iprot.readListBegin(); + struct.success = new ArrayList(_list566.size); + for (int _i567 = 0; _i567 < _list566.size; ++_i567) + { + String _elem568; // required + _elem568 = iprot.readString(); + struct.success.add(_elem568); + } + iprot.readListEnd(); + } + struct.setSuccessIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 1: // O1 + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.o1 = new MetaException(); + struct.o1.read(iprot); + struct.setO1IsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 2: // O2 + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.o2 = new InvalidOperationException(); + struct.o2.read(iprot); + struct.setO2IsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 3: // O3 + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.o3 = new UnknownDBException(); + struct.o3.read(iprot); + struct.setO3IsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (struct.success != null) { + oprot.writeFieldBegin(SUCCESS_FIELD_DESC); + { + oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.success.size())); + for (String _iter569 : struct.success) + { + oprot.writeString(_iter569); + } + oprot.writeListEnd(); + } + oprot.writeFieldEnd(); + } + if (struct.o1 != null) { + oprot.writeFieldBegin(O1_FIELD_DESC); + struct.o1.write(oprot); + oprot.writeFieldEnd(); + } + if (struct.o2 != null) { + oprot.writeFieldBegin(O2_FIELD_DESC); + struct.o2.write(oprot); + oprot.writeFieldEnd(); + } + if (struct.o3 != null) { + oprot.writeFieldBegin(O3_FIELD_DESC); + struct.o3.write(oprot); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class get_table_names_by_filter_resultTupleSchemeFactory implements SchemeFactory { + public get_table_names_by_filter_resultTupleScheme getScheme() { + return new get_table_names_by_filter_resultTupleScheme(); + } + } + + private static class get_table_names_by_filter_resultTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + BitSet optionals = new BitSet(); + if (struct.isSetSuccess()) { + optionals.set(0); + } + if (struct.isSetO1()) { + optionals.set(1); + } + if (struct.isSetO2()) { + optionals.set(2); + } + if (struct.isSetO3()) { + optionals.set(3); + } + oprot.writeBitSet(optionals, 4); + if (struct.isSetSuccess()) { + { + oprot.writeI32(struct.success.size()); + for (String _iter570 : struct.success) + { + oprot.writeString(_iter570); + } + } + } + if (struct.isSetO1()) { + struct.o1.write(oprot); + } + if (struct.isSetO2()) { + struct.o2.write(oprot); + } + if (struct.isSetO3()) { + struct.o3.write(oprot); + } + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + BitSet incoming = iprot.readBitSet(4); + if (incoming.get(0)) { + { + org.apache.thrift.protocol.TList _list571 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); + struct.success = new ArrayList(_list571.size); + for (int _i572 = 0; _i572 < _list571.size; ++_i572) + { + String _elem573; // required + _elem573 = iprot.readString(); + struct.success.add(_elem573); + } + } + struct.setSuccessIsSet(true); + } + if (incoming.get(1)) { + struct.o1 = new MetaException(); + struct.o1.read(iprot); + struct.setO1IsSet(true); + } + if (incoming.get(2)) { + struct.o2 = new InvalidOperationException(); + struct.o2.read(iprot); + struct.setO2IsSet(true); + } + if (incoming.get(3)) { + struct.o3 = new UnknownDBException(); + struct.o3.read(iprot); + struct.setO3IsSet(true); + } + } + } + + } + + public static class alter_table_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_args"); + + private static final org.apache.thrift.protocol.TField DBNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("dbname", org.apache.thrift.protocol.TType.STRING, (short)1); + private static final org.apache.thrift.protocol.TField TBL_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tbl_name", org.apache.thrift.protocol.TType.STRING, (short)2); + private static final org.apache.thrift.protocol.TField NEW_TBL_FIELD_DESC = new org.apache.thrift.protocol.TField("new_tbl", org.apache.thrift.protocol.TType.STRUCT, (short)3); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new alter_table_argsStandardSchemeFactory()); + schemes.put(TupleScheme.class, new alter_table_argsTupleSchemeFactory()); + } + + private String dbname; // required + private String tbl_name; // required + private Table new_tbl; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + DBNAME((short)1, "dbname"), + TBL_NAME((short)2, "tbl_name"), + NEW_TBL((short)3, "new_tbl"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // DBNAME + return DBNAME; + case 2: // TBL_NAME + return TBL_NAME; + case 3: // NEW_TBL + return NEW_TBL; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.DBNAME, new org.apache.thrift.meta_data.FieldMetaData("dbname", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + tmpMap.put(_Fields.TBL_NAME, new org.apache.thrift.meta_data.FieldMetaData("tbl_name", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); + tmpMap.put(_Fields.NEW_TBL, new org.apache.thrift.meta_data.FieldMetaData("new_tbl", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Table.class))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_args.class, metaDataMap); + } + + public alter_table_args() { + } + + public alter_table_args( + String dbname, + String tbl_name, + Table new_tbl) + { + this(); + this.dbname = dbname; + this.tbl_name = tbl_name; + this.new_tbl = new_tbl; + } + + /** + * Performs a deep copy on other. + */ + public alter_table_args(alter_table_args other) { + if (other.isSetDbname()) { + this.dbname = other.dbname; + } + if (other.isSetTbl_name()) { + this.tbl_name = other.tbl_name; + } + if (other.isSetNew_tbl()) { + this.new_tbl = new Table(other.new_tbl); + } + } + + public alter_table_args deepCopy() { + return new alter_table_args(this); + } + + @Override + public void clear() { + this.dbname = null; + this.tbl_name = null; + this.new_tbl = null; + } + + public String getDbname() { + return this.dbname; + } + + public void setDbname(String dbname) { + this.dbname = dbname; + } + + public void unsetDbname() { + this.dbname = null; + } + + /** Returns true if field dbname is set (has been assigned a value) and false otherwise */ + public boolean isSetDbname() { + return this.dbname != null; + } + + public void setDbnameIsSet(boolean value) { + if (!value) { + this.dbname = null; + } + } + + public String getTbl_name() { + return this.tbl_name; + } + + public void setTbl_name(String tbl_name) { + this.tbl_name = tbl_name; + } + + public void unsetTbl_name() { + this.tbl_name = null; + } + + /** Returns true if field tbl_name is set (has been assigned a value) and false otherwise */ + public boolean isSetTbl_name() { + return this.tbl_name != null; + } + + public void setTbl_nameIsSet(boolean value) { + if (!value) { + this.tbl_name = null; + } + } + + public Table getNew_tbl() { + return this.new_tbl; + } + + public void setNew_tbl(Table new_tbl) { + this.new_tbl = new_tbl; + } + + public void unsetNew_tbl() { + this.new_tbl = null; + } + + /** Returns true if field new_tbl is set (has been assigned a value) and false otherwise */ + public boolean isSetNew_tbl() { + return this.new_tbl != null; + } + + public void setNew_tblIsSet(boolean value) { + if (!value) { + this.new_tbl = null; + } + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case DBNAME: + if (value == null) { + unsetDbname(); + } else { + setDbname((String)value); + } + break; + + case TBL_NAME: + if (value == null) { + unsetTbl_name(); + } else { + setTbl_name((String)value); + } + break; + + case NEW_TBL: + if (value == null) { + unsetNew_tbl(); + } else { + setNew_tbl((Table)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case DBNAME: + return getDbname(); + + case TBL_NAME: + return getTbl_name(); + + case NEW_TBL: + return getNew_tbl(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case DBNAME: + return isSetDbname(); + case TBL_NAME: + return isSetTbl_name(); + case NEW_TBL: + return isSetNew_tbl(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof alter_table_args) + return this.equals((alter_table_args)that); + return false; + } + + public boolean equals(alter_table_args that) { + if (that == null) + return false; + + boolean this_present_dbname = true && this.isSetDbname(); + boolean that_present_dbname = true && that.isSetDbname(); + if (this_present_dbname || that_present_dbname) { + if (!(this_present_dbname && that_present_dbname)) + return false; + if (!this.dbname.equals(that.dbname)) + return false; + } + + boolean this_present_tbl_name = true && this.isSetTbl_name(); + boolean that_present_tbl_name = true && that.isSetTbl_name(); + if (this_present_tbl_name || that_present_tbl_name) { + if (!(this_present_tbl_name && that_present_tbl_name)) + return false; + if (!this.tbl_name.equals(that.tbl_name)) + return false; + } + + boolean this_present_new_tbl = true && this.isSetNew_tbl(); + boolean that_present_new_tbl = true && that.isSetNew_tbl(); + if (this_present_new_tbl || that_present_new_tbl) { + if (!(this_present_new_tbl && that_present_new_tbl)) + return false; + if (!this.new_tbl.equals(that.new_tbl)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + HashCodeBuilder builder = new HashCodeBuilder(); + + boolean present_dbname = true && (isSetDbname()); + builder.append(present_dbname); + if (present_dbname) + builder.append(dbname); + + boolean present_tbl_name = true && (isSetTbl_name()); + builder.append(present_tbl_name); + if (present_tbl_name) + builder.append(tbl_name); + + boolean present_new_tbl = true && (isSetNew_tbl()); + builder.append(present_new_tbl); + if (present_new_tbl) + builder.append(new_tbl); + + return builder.toHashCode(); + } + + public int compareTo(alter_table_args other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + alter_table_args typedOther = (alter_table_args)other; + + lastComparison = Boolean.valueOf(isSetDbname()).compareTo(typedOther.isSetDbname()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetDbname()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.dbname, typedOther.dbname); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetTbl_name()).compareTo(typedOther.isSetTbl_name()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetTbl_name()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tbl_name, typedOther.tbl_name); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNew_tbl()).compareTo(typedOther.isSetNew_tbl()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNew_tbl()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.new_tbl, typedOther.new_tbl); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("alter_table_args("); + boolean first = true; + + sb.append("dbname:"); + if (this.dbname == null) { + sb.append("null"); + } else { + sb.append(this.dbname); + } + first = false; + if (!first) sb.append(", "); + sb.append("tbl_name:"); + if (this.tbl_name == null) { + sb.append("null"); + } else { + sb.append(this.tbl_name); + } + first = false; + if (!first) sb.append(", "); + sb.append("new_tbl:"); + if (this.new_tbl == null) { + sb.append("null"); + } else { + sb.append(this.new_tbl); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + // check for sub-struct validity + if (new_tbl != null) { + new_tbl.validate(); + } + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class alter_table_argsStandardSchemeFactory implements SchemeFactory { + public alter_table_argsStandardScheme getScheme() { + return new alter_table_argsStandardScheme(); + } + } + + private static class alter_table_argsStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_args struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 1: // DBNAME + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.dbname = iprot.readString(); + struct.setDbnameIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 2: // TBL_NAME + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.tbl_name = iprot.readString(); + struct.setTbl_nameIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 3: // NEW_TBL + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.new_tbl = new Table(); + struct.new_tbl.read(iprot); + struct.setNew_tblIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_args struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (struct.dbname != null) { + oprot.writeFieldBegin(DBNAME_FIELD_DESC); + oprot.writeString(struct.dbname); + oprot.writeFieldEnd(); + } + if (struct.tbl_name != null) { + oprot.writeFieldBegin(TBL_NAME_FIELD_DESC); + oprot.writeString(struct.tbl_name); + oprot.writeFieldEnd(); + } + if (struct.new_tbl != null) { + oprot.writeFieldBegin(NEW_TBL_FIELD_DESC); + struct.new_tbl.write(oprot); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class alter_table_argsTupleSchemeFactory implements SchemeFactory { + public alter_table_argsTupleScheme getScheme() { + return new alter_table_argsTupleScheme(); + } + } + + private static class alter_table_argsTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_args struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + BitSet optionals = new BitSet(); + if (struct.isSetDbname()) { + optionals.set(0); + } + if (struct.isSetTbl_name()) { + optionals.set(1); + } + if (struct.isSetNew_tbl()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); + if (struct.isSetDbname()) { + oprot.writeString(struct.dbname); + } + if (struct.isSetTbl_name()) { + oprot.writeString(struct.tbl_name); + } + if (struct.isSetNew_tbl()) { + struct.new_tbl.write(oprot); + } + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_args struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + BitSet incoming = iprot.readBitSet(3); + if (incoming.get(0)) { + struct.dbname = iprot.readString(); + struct.setDbnameIsSet(true); + } + if (incoming.get(1)) { + struct.tbl_name = iprot.readString(); + struct.setTbl_nameIsSet(true); + } + if (incoming.get(2)) { + struct.new_tbl = new Table(); + struct.new_tbl.read(iprot); + struct.setNew_tblIsSet(true); + } + } + } + + } + + public static class alter_table_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_result"); + + private static final org.apache.thrift.protocol.TField O1_FIELD_DESC = new org.apache.thrift.protocol.TField("o1", org.apache.thrift.protocol.TType.STRUCT, (short)1); + private static final org.apache.thrift.protocol.TField O2_FIELD_DESC = new org.apache.thrift.protocol.TField("o2", org.apache.thrift.protocol.TType.STRUCT, (short)2); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new alter_table_resultStandardSchemeFactory()); + schemes.put(TupleScheme.class, new alter_table_resultTupleSchemeFactory()); + } + + private InvalidOperationException o1; // required + private MetaException o2; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + O1((short)1, "o1"), + O2((short)2, "o2"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // O1 + return O1; + case 2: // O2 + return O2; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.O1, new org.apache.thrift.meta_data.FieldMetaData("o1", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT))); + tmpMap.put(_Fields.O2, new org.apache.thrift.meta_data.FieldMetaData("o2", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_result.class, metaDataMap); + } + + public alter_table_result() { + } + + public alter_table_result( + InvalidOperationException o1, + MetaException o2) + { + this(); + this.o1 = o1; + this.o2 = o2; + } + + /** + * Performs a deep copy on other. + */ + public alter_table_result(alter_table_result other) { + if (other.isSetO1()) { + this.o1 = new InvalidOperationException(other.o1); + } + if (other.isSetO2()) { + this.o2 = new MetaException(other.o2); + } + } + + public alter_table_result deepCopy() { + return new alter_table_result(this); + } + + @Override + public void clear() { + this.o1 = null; + this.o2 = null; + } + + public InvalidOperationException getO1() { + return this.o1; + } + + public void setO1(InvalidOperationException o1) { + this.o1 = o1; + } + + public void unsetO1() { + this.o1 = null; + } + + /** Returns true if field o1 is set (has been assigned a value) and false otherwise */ + public boolean isSetO1() { + return this.o1 != null; + } + + public void setO1IsSet(boolean value) { + if (!value) { + this.o1 = null; + } + } + + public MetaException getO2() { + return this.o2; + } + + public void setO2(MetaException o2) { + this.o2 = o2; + } + + public void unsetO2() { + this.o2 = null; + } + + /** Returns true if field o2 is set (has been assigned a value) and false otherwise */ + public boolean isSetO2() { + return this.o2 != null; + } + + public void setO2IsSet(boolean value) { + if (!value) { + this.o2 = null; + } + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case O1: + if (value == null) { + unsetO1(); + } else { + setO1((InvalidOperationException)value); + } + break; + + case O2: + if (value == null) { + unsetO2(); + } else { + setO2((MetaException)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case O1: + return getO1(); + + case O2: + return getO2(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case O1: + return isSetO1(); + case O2: + return isSetO2(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof alter_table_result) + return this.equals((alter_table_result)that); + return false; + } + + public boolean equals(alter_table_result that) { + if (that == null) + return false; + + boolean this_present_o1 = true && this.isSetO1(); + boolean that_present_o1 = true && that.isSetO1(); + if (this_present_o1 || that_present_o1) { + if (!(this_present_o1 && that_present_o1)) + return false; + if (!this.o1.equals(that.o1)) + return false; + } + + boolean this_present_o2 = true && this.isSetO2(); + boolean that_present_o2 = true && that.isSetO2(); + if (this_present_o2 || that_present_o2) { + if (!(this_present_o2 && that_present_o2)) + return false; + if (!this.o2.equals(that.o2)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + HashCodeBuilder builder = new HashCodeBuilder(); + + boolean present_o1 = true && (isSetO1()); + builder.append(present_o1); + if (present_o1) + builder.append(o1); + + boolean present_o2 = true && (isSetO2()); + builder.append(present_o2); + if (present_o2) + builder.append(o2); + + return builder.toHashCode(); + } + + public int compareTo(alter_table_result other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + alter_table_result typedOther = (alter_table_result)other; + + lastComparison = Boolean.valueOf(isSetO1()).compareTo(typedOther.isSetO1()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetO1()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.o1, typedOther.o1); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetO2()).compareTo(typedOther.isSetO2()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetO2()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.o2, typedOther.o2); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("alter_table_result("); boolean first = true; - sb.append("success:"); - if (this.success == null) { - sb.append("null"); - } else { - sb.append(this.success); - } - first = false; - if (!first) sb.append(", "); sb.append("o1:"); if (this.o1 == null) { sb.append("null"); @@ -34237,14 +35463,6 @@ public String toString() { sb.append(this.o2); } first = false; - if (!first) sb.append(", "); - sb.append("o3:"); - if (this.o3 == null) { - sb.append("null"); - } else { - sb.append(this.o3); - } - first = false; sb.append(")"); return sb.toString(); } @@ -34270,15 +35488,15 @@ private void readObject(java.io.ObjectInputStream in) throws java.io.IOException } } - private static class get_table_names_by_filter_resultStandardSchemeFactory implements SchemeFactory { - public get_table_names_by_filter_resultStandardScheme getScheme() { - return new get_table_names_by_filter_resultStandardScheme(); + private static class alter_table_resultStandardSchemeFactory implements SchemeFactory { + public alter_table_resultStandardScheme getScheme() { + return new alter_table_resultStandardScheme(); } } - private static class get_table_names_by_filter_resultStandardScheme extends StandardScheme { + private static class alter_table_resultStandardScheme extends StandardScheme { - public void read(org.apache.thrift.protocol.TProtocol iprot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_result struct) throws org.apache.thrift.TException { org.apache.thrift.protocol.TField schemeField; iprot.readStructBegin(); while (true) @@ -34288,27 +35506,9 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, get_table_names_by_ break; } switch (schemeField.id) { - case 0: // SUCCESS - if (schemeField.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list566 = iprot.readListBegin(); - struct.success = new ArrayList(_list566.size); - for (int _i567 = 0; _i567 < _list566.size; ++_i567) - { - String _elem568; // required - _elem568 = iprot.readString(); - struct.success.add(_elem568); - } - iprot.readListEnd(); - } - struct.setSuccessIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; case 1: // O1 if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.o1 = new MetaException(); + struct.o1 = new InvalidOperationException(); struct.o1.read(iprot); struct.setO1IsSet(true); } else { @@ -34317,22 +35517,13 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, get_table_names_by_ break; case 2: // O2 if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.o2 = new InvalidOperationException(); + struct.o2 = new MetaException(); struct.o2.read(iprot); struct.setO2IsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 3: // O3 - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.o3 = new UnknownDBException(); - struct.o3.read(iprot); - struct.setO3IsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); - } - break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -34342,22 +35533,10 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, get_table_names_by_ struct.validate(); } - public void write(org.apache.thrift.protocol.TProtocol oprot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_result struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); - if (struct.success != null) { - oprot.writeFieldBegin(SUCCESS_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.success.size())); - for (String _iter569 : struct.success) - { - oprot.writeString(_iter569); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } if (struct.o1 != null) { oprot.writeFieldBegin(O1_FIELD_DESC); struct.o1.write(oprot); @@ -34368,121 +35547,83 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, get_table_names_by struct.o2.write(oprot); oprot.writeFieldEnd(); } - if (struct.o3 != null) { - oprot.writeFieldBegin(O3_FIELD_DESC); - struct.o3.write(oprot); - oprot.writeFieldEnd(); - } oprot.writeFieldStop(); oprot.writeStructEnd(); } } - private static class get_table_names_by_filter_resultTupleSchemeFactory implements SchemeFactory { - public get_table_names_by_filter_resultTupleScheme getScheme() { - return new get_table_names_by_filter_resultTupleScheme(); + private static class alter_table_resultTupleSchemeFactory implements SchemeFactory { + public alter_table_resultTupleScheme getScheme() { + return new alter_table_resultTupleScheme(); } } - private static class get_table_names_by_filter_resultTupleScheme extends TupleScheme { + private static class alter_table_resultTupleScheme extends TupleScheme { @Override - public void write(org.apache.thrift.protocol.TProtocol prot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_result struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; BitSet optionals = new BitSet(); - if (struct.isSetSuccess()) { - optionals.set(0); - } if (struct.isSetO1()) { - optionals.set(1); + optionals.set(0); } if (struct.isSetO2()) { - optionals.set(2); - } - if (struct.isSetO3()) { - optionals.set(3); - } - oprot.writeBitSet(optionals, 4); - if (struct.isSetSuccess()) { - { - oprot.writeI32(struct.success.size()); - for (String _iter570 : struct.success) - { - oprot.writeString(_iter570); - } - } + optionals.set(1); } + oprot.writeBitSet(optionals, 2); if (struct.isSetO1()) { struct.o1.write(oprot); } if (struct.isSetO2()) { struct.o2.write(oprot); } - if (struct.isSetO3()) { - struct.o3.write(oprot); - } } @Override - public void read(org.apache.thrift.protocol.TProtocol prot, get_table_names_by_filter_result struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_result struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(4); + BitSet incoming = iprot.readBitSet(2); if (incoming.get(0)) { - { - org.apache.thrift.protocol.TList _list571 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32()); - struct.success = new ArrayList(_list571.size); - for (int _i572 = 0; _i572 < _list571.size; ++_i572) - { - String _elem573; // required - _elem573 = iprot.readString(); - struct.success.add(_elem573); - } - } - struct.setSuccessIsSet(true); - } - if (incoming.get(1)) { - struct.o1 = new MetaException(); + struct.o1 = new InvalidOperationException(); struct.o1.read(iprot); struct.setO1IsSet(true); } - if (incoming.get(2)) { - struct.o2 = new InvalidOperationException(); + if (incoming.get(1)) { + struct.o2 = new MetaException(); struct.o2.read(iprot); struct.setO2IsSet(true); } - if (incoming.get(3)) { - struct.o3 = new UnknownDBException(); - struct.o3.read(iprot); - struct.setO3IsSet(true); - } } } } - public static class alter_table_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_args"); + public static class alter_table_with_environment_context_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_with_environment_context_args"); private static final org.apache.thrift.protocol.TField DBNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("dbname", org.apache.thrift.protocol.TType.STRING, (short)1); private static final org.apache.thrift.protocol.TField TBL_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tbl_name", org.apache.thrift.protocol.TType.STRING, (short)2); private static final org.apache.thrift.protocol.TField NEW_TBL_FIELD_DESC = new org.apache.thrift.protocol.TField("new_tbl", org.apache.thrift.protocol.TType.STRUCT, (short)3); + private static final org.apache.thrift.protocol.TField ENVIRONMENT_CONTEXT_FIELD_DESC = new org.apache.thrift.protocol.TField("environment_context", org.apache.thrift.protocol.TType.STRUCT, (short)4); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { - schemes.put(StandardScheme.class, new alter_table_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new alter_table_argsTupleSchemeFactory()); + schemes.put(StandardScheme.class, new alter_table_with_environment_context_argsStandardSchemeFactory()); + schemes.put(TupleScheme.class, new alter_table_with_environment_context_argsTupleSchemeFactory()); } private String dbname; // required private String tbl_name; // required private Table new_tbl; // required + private EnvironmentContext environment_context; // required /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { DBNAME((short)1, "dbname"), TBL_NAME((short)2, "tbl_name"), - NEW_TBL((short)3, "new_tbl"); + NEW_TBL((short)3, "new_tbl"), + ENVIRONMENT_CONTEXT((short)4, "environment_context"); private static final Map byName = new HashMap(); @@ -34503,6 +35644,8 @@ public static _Fields findByThriftId(int fieldId) { return TBL_NAME; case 3: // NEW_TBL return NEW_TBL; + case 4: // ENVIRONMENT_CONTEXT + return ENVIRONMENT_CONTEXT; default: return null; } @@ -34552,28 +35695,32 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.NEW_TBL, new org.apache.thrift.meta_data.FieldMetaData("new_tbl", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Table.class))); + tmpMap.put(_Fields.ENVIRONMENT_CONTEXT, new org.apache.thrift.meta_data.FieldMetaData("environment_context", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, EnvironmentContext.class))); metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_args.class, metaDataMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_with_environment_context_args.class, metaDataMap); } - public alter_table_args() { + public alter_table_with_environment_context_args() { } - public alter_table_args( + public alter_table_with_environment_context_args( String dbname, String tbl_name, - Table new_tbl) + Table new_tbl, + EnvironmentContext environment_context) { this(); this.dbname = dbname; this.tbl_name = tbl_name; this.new_tbl = new_tbl; + this.environment_context = environment_context; } /** * Performs a deep copy on other. */ - public alter_table_args(alter_table_args other) { + public alter_table_with_environment_context_args(alter_table_with_environment_context_args other) { if (other.isSetDbname()) { this.dbname = other.dbname; } @@ -34583,10 +35730,13 @@ public alter_table_args(alter_table_args other) { if (other.isSetNew_tbl()) { this.new_tbl = new Table(other.new_tbl); } + if (other.isSetEnvironment_context()) { + this.environment_context = new EnvironmentContext(other.environment_context); + } } - public alter_table_args deepCopy() { - return new alter_table_args(this); + public alter_table_with_environment_context_args deepCopy() { + return new alter_table_with_environment_context_args(this); } @Override @@ -34594,6 +35744,7 @@ public void clear() { this.dbname = null; this.tbl_name = null; this.new_tbl = null; + this.environment_context = null; } public String getDbname() { @@ -34665,6 +35816,29 @@ public void setNew_tblIsSet(boolean value) { } } + public EnvironmentContext getEnvironment_context() { + return this.environment_context; + } + + public void setEnvironment_context(EnvironmentContext environment_context) { + this.environment_context = environment_context; + } + + public void unsetEnvironment_context() { + this.environment_context = null; + } + + /** Returns true if field environment_context is set (has been assigned a value) and false otherwise */ + public boolean isSetEnvironment_context() { + return this.environment_context != null; + } + + public void setEnvironment_contextIsSet(boolean value) { + if (!value) { + this.environment_context = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case DBNAME: @@ -34691,6 +35865,14 @@ public void setFieldValue(_Fields field, Object value) { } break; + case ENVIRONMENT_CONTEXT: + if (value == null) { + unsetEnvironment_context(); + } else { + setEnvironment_context((EnvironmentContext)value); + } + break; + } } @@ -34705,6 +35887,9 @@ public Object getFieldValue(_Fields field) { case NEW_TBL: return getNew_tbl(); + case ENVIRONMENT_CONTEXT: + return getEnvironment_context(); + } throw new IllegalStateException(); } @@ -34722,6 +35907,8 @@ public boolean isSet(_Fields field) { return isSetTbl_name(); case NEW_TBL: return isSetNew_tbl(); + case ENVIRONMENT_CONTEXT: + return isSetEnvironment_context(); } throw new IllegalStateException(); } @@ -34730,12 +35917,12 @@ public boolean isSet(_Fields field) { public boolean equals(Object that) { if (that == null) return false; - if (that instanceof alter_table_args) - return this.equals((alter_table_args)that); + if (that instanceof alter_table_with_environment_context_args) + return this.equals((alter_table_with_environment_context_args)that); return false; } - public boolean equals(alter_table_args that) { + public boolean equals(alter_table_with_environment_context_args that) { if (that == null) return false; @@ -34766,6 +35953,15 @@ public boolean equals(alter_table_args that) { return false; } + boolean this_present_environment_context = true && this.isSetEnvironment_context(); + boolean that_present_environment_context = true && that.isSetEnvironment_context(); + if (this_present_environment_context || that_present_environment_context) { + if (!(this_present_environment_context && that_present_environment_context)) + return false; + if (!this.environment_context.equals(that.environment_context)) + return false; + } + return true; } @@ -34788,16 +35984,21 @@ public int hashCode() { if (present_new_tbl) builder.append(new_tbl); + boolean present_environment_context = true && (isSetEnvironment_context()); + builder.append(present_environment_context); + if (present_environment_context) + builder.append(environment_context); + return builder.toHashCode(); } - public int compareTo(alter_table_args other) { + public int compareTo(alter_table_with_environment_context_args other) { if (!getClass().equals(other.getClass())) { return getClass().getName().compareTo(other.getClass().getName()); } int lastComparison = 0; - alter_table_args typedOther = (alter_table_args)other; + alter_table_with_environment_context_args typedOther = (alter_table_with_environment_context_args)other; lastComparison = Boolean.valueOf(isSetDbname()).compareTo(typedOther.isSetDbname()); if (lastComparison != 0) { @@ -34829,6 +36030,16 @@ public int compareTo(alter_table_args other) { return lastComparison; } } + lastComparison = Boolean.valueOf(isSetEnvironment_context()).compareTo(typedOther.isSetEnvironment_context()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetEnvironment_context()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.environment_context, typedOther.environment_context); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -34846,7 +36057,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache. @Override public String toString() { - StringBuilder sb = new StringBuilder("alter_table_args("); + StringBuilder sb = new StringBuilder("alter_table_with_environment_context_args("); boolean first = true; sb.append("dbname:"); @@ -34872,6 +36083,14 @@ public String toString() { sb.append(this.new_tbl); } first = false; + if (!first) sb.append(", "); + sb.append("environment_context:"); + if (this.environment_context == null) { + sb.append("null"); + } else { + sb.append(this.environment_context); + } + first = false; sb.append(")"); return sb.toString(); } @@ -34882,6 +36101,9 @@ public void validate() throws org.apache.thrift.TException { if (new_tbl != null) { new_tbl.validate(); } + if (environment_context != null) { + environment_context.validate(); + } } private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { @@ -34900,15 +36122,15 @@ private void readObject(java.io.ObjectInputStream in) throws java.io.IOException } } - private static class alter_table_argsStandardSchemeFactory implements SchemeFactory { - public alter_table_argsStandardScheme getScheme() { - return new alter_table_argsStandardScheme(); + private static class alter_table_with_environment_context_argsStandardSchemeFactory implements SchemeFactory { + public alter_table_with_environment_context_argsStandardScheme getScheme() { + return new alter_table_with_environment_context_argsStandardScheme(); } } - private static class alter_table_argsStandardScheme extends StandardScheme { + private static class alter_table_with_environment_context_argsStandardScheme extends StandardScheme { - public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_args struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { org.apache.thrift.protocol.TField schemeField; iprot.readStructBegin(); while (true) @@ -34943,6 +36165,15 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_args st org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 4: // ENVIRONMENT_CONTEXT + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.environment_context = new EnvironmentContext(); + struct.environment_context.read(iprot); + struct.setEnvironment_contextIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -34952,7 +36183,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_args st struct.validate(); } - public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_args struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); @@ -34971,22 +36202,27 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_args s struct.new_tbl.write(oprot); oprot.writeFieldEnd(); } + if (struct.environment_context != null) { + oprot.writeFieldBegin(ENVIRONMENT_CONTEXT_FIELD_DESC); + struct.environment_context.write(oprot); + oprot.writeFieldEnd(); + } oprot.writeFieldStop(); oprot.writeStructEnd(); } } - private static class alter_table_argsTupleSchemeFactory implements SchemeFactory { - public alter_table_argsTupleScheme getScheme() { - return new alter_table_argsTupleScheme(); + private static class alter_table_with_environment_context_argsTupleSchemeFactory implements SchemeFactory { + public alter_table_with_environment_context_argsTupleScheme getScheme() { + return new alter_table_with_environment_context_argsTupleScheme(); } } - private static class alter_table_argsTupleScheme extends TupleScheme { + private static class alter_table_with_environment_context_argsTupleScheme extends TupleScheme { @Override - public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_args struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; BitSet optionals = new BitSet(); if (struct.isSetDbname()) { @@ -34998,7 +36234,10 @@ public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_args st if (struct.isSetNew_tbl()) { optionals.set(2); } - oprot.writeBitSet(optionals, 3); + if (struct.isSetEnvironment_context()) { + optionals.set(3); + } + oprot.writeBitSet(optionals, 4); if (struct.isSetDbname()) { oprot.writeString(struct.dbname); } @@ -35008,12 +36247,15 @@ public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_args st if (struct.isSetNew_tbl()) { struct.new_tbl.write(oprot); } + if (struct.isSetEnvironment_context()) { + struct.environment_context.write(oprot); + } } @Override - public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_args struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; - BitSet incoming = iprot.readBitSet(3); + BitSet incoming = iprot.readBitSet(4); if (incoming.get(0)) { struct.dbname = iprot.readString(); struct.setDbnameIsSet(true); @@ -35027,21 +36269,26 @@ public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_args str struct.new_tbl.read(iprot); struct.setNew_tblIsSet(true); } + if (incoming.get(3)) { + struct.environment_context = new EnvironmentContext(); + struct.environment_context.read(iprot); + struct.setEnvironment_contextIsSet(true); + } } } } - public static class alter_table_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_result"); + public static class alter_table_with_environment_context_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_with_environment_context_result"); private static final org.apache.thrift.protocol.TField O1_FIELD_DESC = new org.apache.thrift.protocol.TField("o1", org.apache.thrift.protocol.TType.STRUCT, (short)1); private static final org.apache.thrift.protocol.TField O2_FIELD_DESC = new org.apache.thrift.protocol.TField("o2", org.apache.thrift.protocol.TType.STRUCT, (short)2); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { - schemes.put(StandardScheme.class, new alter_table_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new alter_table_resultTupleSchemeFactory()); + schemes.put(StandardScheme.class, new alter_table_with_environment_context_resultStandardSchemeFactory()); + schemes.put(TupleScheme.class, new alter_table_with_environment_context_resultTupleSchemeFactory()); } private InvalidOperationException o1; // required @@ -35117,13 +36364,13 @@ public String getFieldName() { tmpMap.put(_Fields.O2, new org.apache.thrift.meta_data.FieldMetaData("o2", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT))); metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_result.class, metaDataMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_with_environment_context_result.class, metaDataMap); } - public alter_table_result() { + public alter_table_with_environment_context_result() { } - public alter_table_result( + public alter_table_with_environment_context_result( InvalidOperationException o1, MetaException o2) { @@ -35135,7 +36382,7 @@ public alter_table_result( /** * Performs a deep copy on other. */ - public alter_table_result(alter_table_result other) { + public alter_table_with_environment_context_result(alter_table_with_environment_context_result other) { if (other.isSetO1()) { this.o1 = new InvalidOperationException(other.o1); } @@ -35144,8 +36391,8 @@ public alter_table_result(alter_table_result other) { } } - public alter_table_result deepCopy() { - return new alter_table_result(this); + public alter_table_with_environment_context_result deepCopy() { + return new alter_table_with_environment_context_result(this); } @Override @@ -35252,12 +36499,12 @@ public boolean isSet(_Fields field) { public boolean equals(Object that) { if (that == null) return false; - if (that instanceof alter_table_result) - return this.equals((alter_table_result)that); + if (that instanceof alter_table_with_environment_context_result) + return this.equals((alter_table_with_environment_context_result)that); return false; } - public boolean equals(alter_table_result that) { + public boolean equals(alter_table_with_environment_context_result that) { if (that == null) return false; @@ -35299,13 +36546,13 @@ public int hashCode() { return builder.toHashCode(); } - public int compareTo(alter_table_result other) { + public int compareTo(alter_table_with_environment_context_result other) { if (!getClass().equals(other.getClass())) { return getClass().getName().compareTo(other.getClass().getName()); } int lastComparison = 0; - alter_table_result typedOther = (alter_table_result)other; + alter_table_with_environment_context_result typedOther = (alter_table_with_environment_context_result)other; lastComparison = Boolean.valueOf(isSetO1()).compareTo(typedOther.isSetO1()); if (lastComparison != 0) { @@ -35344,7 +36591,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache. @Override public String toString() { - StringBuilder sb = new StringBuilder("alter_table_result("); + StringBuilder sb = new StringBuilder("alter_table_with_environment_context_result("); boolean first = true; sb.append("o1:"); @@ -35387,15 +36634,15 @@ private void readObject(java.io.ObjectInputStream in) throws java.io.IOException } } - private static class alter_table_resultStandardSchemeFactory implements SchemeFactory { - public alter_table_resultStandardScheme getScheme() { - return new alter_table_resultStandardScheme(); + private static class alter_table_with_environment_context_resultStandardSchemeFactory implements SchemeFactory { + public alter_table_with_environment_context_resultStandardScheme getScheme() { + return new alter_table_with_environment_context_resultStandardScheme(); } } - private static class alter_table_resultStandardScheme extends StandardScheme { + private static class alter_table_with_environment_context_resultStandardScheme extends StandardScheme { - public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_result struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { org.apache.thrift.protocol.TField schemeField; iprot.readStructBegin(); while (true) @@ -35432,7 +36679,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_result struct.validate(); } - public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_result struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); @@ -35452,16 +36699,16 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_result } - private static class alter_table_resultTupleSchemeFactory implements SchemeFactory { - public alter_table_resultTupleScheme getScheme() { - return new alter_table_resultTupleScheme(); + private static class alter_table_with_environment_context_resultTupleSchemeFactory implements SchemeFactory { + public alter_table_with_environment_context_resultTupleScheme getScheme() { + return new alter_table_with_environment_context_resultTupleScheme(); } } - private static class alter_table_resultTupleScheme extends TupleScheme { + private static class alter_table_with_environment_context_resultTupleScheme extends TupleScheme { @Override - public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_result struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; BitSet optionals = new BitSet(); if (struct.isSetO1()) { @@ -35480,7 +36727,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_result } @Override - public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_result struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; BitSet incoming = iprot.readBitSet(2); if (incoming.get(0)) { @@ -35498,31 +36745,31 @@ public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_result s } - public static class alter_table_with_environment_context_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_with_environment_context_args"); + public static class alter_table_with_cascade_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_with_cascade_args"); private static final org.apache.thrift.protocol.TField DBNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("dbname", org.apache.thrift.protocol.TType.STRING, (short)1); private static final org.apache.thrift.protocol.TField TBL_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tbl_name", org.apache.thrift.protocol.TType.STRING, (short)2); private static final org.apache.thrift.protocol.TField NEW_TBL_FIELD_DESC = new org.apache.thrift.protocol.TField("new_tbl", org.apache.thrift.protocol.TType.STRUCT, (short)3); - private static final org.apache.thrift.protocol.TField ENVIRONMENT_CONTEXT_FIELD_DESC = new org.apache.thrift.protocol.TField("environment_context", org.apache.thrift.protocol.TType.STRUCT, (short)4); + private static final org.apache.thrift.protocol.TField CASCADE_FIELD_DESC = new org.apache.thrift.protocol.TField("cascade", org.apache.thrift.protocol.TType.BOOL, (short)4); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { - schemes.put(StandardScheme.class, new alter_table_with_environment_context_argsStandardSchemeFactory()); - schemes.put(TupleScheme.class, new alter_table_with_environment_context_argsTupleSchemeFactory()); + schemes.put(StandardScheme.class, new alter_table_with_cascade_argsStandardSchemeFactory()); + schemes.put(TupleScheme.class, new alter_table_with_cascade_argsTupleSchemeFactory()); } private String dbname; // required private String tbl_name; // required private Table new_tbl; // required - private EnvironmentContext environment_context; // required + private boolean cascade; // required /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { DBNAME((short)1, "dbname"), TBL_NAME((short)2, "tbl_name"), NEW_TBL((short)3, "new_tbl"), - ENVIRONMENT_CONTEXT((short)4, "environment_context"); + CASCADE((short)4, "cascade"); private static final Map byName = new HashMap(); @@ -35543,8 +36790,8 @@ public static _Fields findByThriftId(int fieldId) { return TBL_NAME; case 3: // NEW_TBL return NEW_TBL; - case 4: // ENVIRONMENT_CONTEXT - return ENVIRONMENT_CONTEXT; + case 4: // CASCADE + return CASCADE; default: return null; } @@ -35585,6 +36832,8 @@ public String getFieldName() { } // isset id assignments + private static final int __CASCADE_ISSET_ID = 0; + private byte __isset_bitfield = 0; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -35594,32 +36843,34 @@ public String getFieldName() { new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.NEW_TBL, new org.apache.thrift.meta_data.FieldMetaData("new_tbl", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Table.class))); - tmpMap.put(_Fields.ENVIRONMENT_CONTEXT, new org.apache.thrift.meta_data.FieldMetaData("environment_context", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, EnvironmentContext.class))); + tmpMap.put(_Fields.CASCADE, new org.apache.thrift.meta_data.FieldMetaData("cascade", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_with_environment_context_args.class, metaDataMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_with_cascade_args.class, metaDataMap); } - public alter_table_with_environment_context_args() { + public alter_table_with_cascade_args() { } - public alter_table_with_environment_context_args( + public alter_table_with_cascade_args( String dbname, String tbl_name, Table new_tbl, - EnvironmentContext environment_context) + boolean cascade) { this(); this.dbname = dbname; this.tbl_name = tbl_name; this.new_tbl = new_tbl; - this.environment_context = environment_context; + this.cascade = cascade; + setCascadeIsSet(true); } /** * Performs a deep copy on other. */ - public alter_table_with_environment_context_args(alter_table_with_environment_context_args other) { + public alter_table_with_cascade_args(alter_table_with_cascade_args other) { + __isset_bitfield = other.__isset_bitfield; if (other.isSetDbname()) { this.dbname = other.dbname; } @@ -35629,13 +36880,11 @@ public alter_table_with_environment_context_args(alter_table_with_environment_co if (other.isSetNew_tbl()) { this.new_tbl = new Table(other.new_tbl); } - if (other.isSetEnvironment_context()) { - this.environment_context = new EnvironmentContext(other.environment_context); - } + this.cascade = other.cascade; } - public alter_table_with_environment_context_args deepCopy() { - return new alter_table_with_environment_context_args(this); + public alter_table_with_cascade_args deepCopy() { + return new alter_table_with_cascade_args(this); } @Override @@ -35643,7 +36892,8 @@ public void clear() { this.dbname = null; this.tbl_name = null; this.new_tbl = null; - this.environment_context = null; + setCascadeIsSet(false); + this.cascade = false; } public String getDbname() { @@ -35715,27 +36965,26 @@ public void setNew_tblIsSet(boolean value) { } } - public EnvironmentContext getEnvironment_context() { - return this.environment_context; + public boolean isCascade() { + return this.cascade; } - public void setEnvironment_context(EnvironmentContext environment_context) { - this.environment_context = environment_context; + public void setCascade(boolean cascade) { + this.cascade = cascade; + setCascadeIsSet(true); } - public void unsetEnvironment_context() { - this.environment_context = null; + public void unsetCascade() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __CASCADE_ISSET_ID); } - /** Returns true if field environment_context is set (has been assigned a value) and false otherwise */ - public boolean isSetEnvironment_context() { - return this.environment_context != null; + /** Returns true if field cascade is set (has been assigned a value) and false otherwise */ + public boolean isSetCascade() { + return EncodingUtils.testBit(__isset_bitfield, __CASCADE_ISSET_ID); } - public void setEnvironment_contextIsSet(boolean value) { - if (!value) { - this.environment_context = null; - } + public void setCascadeIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __CASCADE_ISSET_ID, value); } public void setFieldValue(_Fields field, Object value) { @@ -35764,11 +37013,11 @@ public void setFieldValue(_Fields field, Object value) { } break; - case ENVIRONMENT_CONTEXT: + case CASCADE: if (value == null) { - unsetEnvironment_context(); + unsetCascade(); } else { - setEnvironment_context((EnvironmentContext)value); + setCascade((Boolean)value); } break; @@ -35786,8 +37035,8 @@ public Object getFieldValue(_Fields field) { case NEW_TBL: return getNew_tbl(); - case ENVIRONMENT_CONTEXT: - return getEnvironment_context(); + case CASCADE: + return Boolean.valueOf(isCascade()); } throw new IllegalStateException(); @@ -35806,8 +37055,8 @@ public boolean isSet(_Fields field) { return isSetTbl_name(); case NEW_TBL: return isSetNew_tbl(); - case ENVIRONMENT_CONTEXT: - return isSetEnvironment_context(); + case CASCADE: + return isSetCascade(); } throw new IllegalStateException(); } @@ -35816,12 +37065,12 @@ public boolean isSet(_Fields field) { public boolean equals(Object that) { if (that == null) return false; - if (that instanceof alter_table_with_environment_context_args) - return this.equals((alter_table_with_environment_context_args)that); + if (that instanceof alter_table_with_cascade_args) + return this.equals((alter_table_with_cascade_args)that); return false; } - public boolean equals(alter_table_with_environment_context_args that) { + public boolean equals(alter_table_with_cascade_args that) { if (that == null) return false; @@ -35852,12 +37101,12 @@ public boolean equals(alter_table_with_environment_context_args that) { return false; } - boolean this_present_environment_context = true && this.isSetEnvironment_context(); - boolean that_present_environment_context = true && that.isSetEnvironment_context(); - if (this_present_environment_context || that_present_environment_context) { - if (!(this_present_environment_context && that_present_environment_context)) + boolean this_present_cascade = true; + boolean that_present_cascade = true; + if (this_present_cascade || that_present_cascade) { + if (!(this_present_cascade && that_present_cascade)) return false; - if (!this.environment_context.equals(that.environment_context)) + if (this.cascade != that.cascade) return false; } @@ -35883,21 +37132,21 @@ public int hashCode() { if (present_new_tbl) builder.append(new_tbl); - boolean present_environment_context = true && (isSetEnvironment_context()); - builder.append(present_environment_context); - if (present_environment_context) - builder.append(environment_context); + boolean present_cascade = true; + builder.append(present_cascade); + if (present_cascade) + builder.append(cascade); return builder.toHashCode(); } - public int compareTo(alter_table_with_environment_context_args other) { + public int compareTo(alter_table_with_cascade_args other) { if (!getClass().equals(other.getClass())) { return getClass().getName().compareTo(other.getClass().getName()); } int lastComparison = 0; - alter_table_with_environment_context_args typedOther = (alter_table_with_environment_context_args)other; + alter_table_with_cascade_args typedOther = (alter_table_with_cascade_args)other; lastComparison = Boolean.valueOf(isSetDbname()).compareTo(typedOther.isSetDbname()); if (lastComparison != 0) { @@ -35929,12 +37178,12 @@ public int compareTo(alter_table_with_environment_context_args other) { return lastComparison; } } - lastComparison = Boolean.valueOf(isSetEnvironment_context()).compareTo(typedOther.isSetEnvironment_context()); + lastComparison = Boolean.valueOf(isSetCascade()).compareTo(typedOther.isSetCascade()); if (lastComparison != 0) { return lastComparison; } - if (isSetEnvironment_context()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.environment_context, typedOther.environment_context); + if (isSetCascade()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.cascade, typedOther.cascade); if (lastComparison != 0) { return lastComparison; } @@ -35956,7 +37205,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache. @Override public String toString() { - StringBuilder sb = new StringBuilder("alter_table_with_environment_context_args("); + StringBuilder sb = new StringBuilder("alter_table_with_cascade_args("); boolean first = true; sb.append("dbname:"); @@ -35983,12 +37232,8 @@ public String toString() { } first = false; if (!first) sb.append(", "); - sb.append("environment_context:"); - if (this.environment_context == null) { - sb.append("null"); - } else { - sb.append(this.environment_context); - } + sb.append("cascade:"); + sb.append(this.cascade); first = false; sb.append(")"); return sb.toString(); @@ -36000,9 +37245,6 @@ public void validate() throws org.apache.thrift.TException { if (new_tbl != null) { new_tbl.validate(); } - if (environment_context != null) { - environment_context.validate(); - } } private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { @@ -36015,21 +37257,23 @@ private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOExcept private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); } catch (org.apache.thrift.TException te) { throw new java.io.IOException(te); } } - private static class alter_table_with_environment_context_argsStandardSchemeFactory implements SchemeFactory { - public alter_table_with_environment_context_argsStandardScheme getScheme() { - return new alter_table_with_environment_context_argsStandardScheme(); + private static class alter_table_with_cascade_argsStandardSchemeFactory implements SchemeFactory { + public alter_table_with_cascade_argsStandardScheme getScheme() { + return new alter_table_with_cascade_argsStandardScheme(); } } - private static class alter_table_with_environment_context_argsStandardScheme extends StandardScheme { + private static class alter_table_with_cascade_argsStandardScheme extends StandardScheme { - public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_cascade_args struct) throws org.apache.thrift.TException { org.apache.thrift.protocol.TField schemeField; iprot.readStructBegin(); while (true) @@ -36064,11 +37308,10 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_en org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; - case 4: // ENVIRONMENT_CONTEXT - if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { - struct.environment_context = new EnvironmentContext(); - struct.environment_context.read(iprot); - struct.setEnvironment_contextIsSet(true); + case 4: // CASCADE + if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) { + struct.cascade = iprot.readBool(); + struct.setCascadeIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -36082,7 +37325,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_en struct.validate(); } - public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_cascade_args struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); @@ -36101,27 +37344,25 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_e struct.new_tbl.write(oprot); oprot.writeFieldEnd(); } - if (struct.environment_context != null) { - oprot.writeFieldBegin(ENVIRONMENT_CONTEXT_FIELD_DESC); - struct.environment_context.write(oprot); - oprot.writeFieldEnd(); - } + oprot.writeFieldBegin(CASCADE_FIELD_DESC); + oprot.writeBool(struct.cascade); + oprot.writeFieldEnd(); oprot.writeFieldStop(); oprot.writeStructEnd(); } } - private static class alter_table_with_environment_context_argsTupleSchemeFactory implements SchemeFactory { - public alter_table_with_environment_context_argsTupleScheme getScheme() { - return new alter_table_with_environment_context_argsTupleScheme(); + private static class alter_table_with_cascade_argsTupleSchemeFactory implements SchemeFactory { + public alter_table_with_cascade_argsTupleScheme getScheme() { + return new alter_table_with_cascade_argsTupleScheme(); } } - private static class alter_table_with_environment_context_argsTupleScheme extends TupleScheme { + private static class alter_table_with_cascade_argsTupleScheme extends TupleScheme { @Override - public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_cascade_args struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; BitSet optionals = new BitSet(); if (struct.isSetDbname()) { @@ -36133,7 +37374,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_en if (struct.isSetNew_tbl()) { optionals.set(2); } - if (struct.isSetEnvironment_context()) { + if (struct.isSetCascade()) { optionals.set(3); } oprot.writeBitSet(optionals, 4); @@ -36146,13 +37387,13 @@ public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_en if (struct.isSetNew_tbl()) { struct.new_tbl.write(oprot); } - if (struct.isSetEnvironment_context()) { - struct.environment_context.write(oprot); + if (struct.isSetCascade()) { + oprot.writeBool(struct.cascade); } } @Override - public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_args struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_cascade_args struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; BitSet incoming = iprot.readBitSet(4); if (incoming.get(0)) { @@ -36169,25 +37410,24 @@ public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_env struct.setNew_tblIsSet(true); } if (incoming.get(3)) { - struct.environment_context = new EnvironmentContext(); - struct.environment_context.read(iprot); - struct.setEnvironment_contextIsSet(true); + struct.cascade = iprot.readBool(); + struct.setCascadeIsSet(true); } } } } - public static class alter_table_with_environment_context_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_with_environment_context_result"); + public static class alter_table_with_cascade_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("alter_table_with_cascade_result"); private static final org.apache.thrift.protocol.TField O1_FIELD_DESC = new org.apache.thrift.protocol.TField("o1", org.apache.thrift.protocol.TType.STRUCT, (short)1); private static final org.apache.thrift.protocol.TField O2_FIELD_DESC = new org.apache.thrift.protocol.TField("o2", org.apache.thrift.protocol.TType.STRUCT, (short)2); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { - schemes.put(StandardScheme.class, new alter_table_with_environment_context_resultStandardSchemeFactory()); - schemes.put(TupleScheme.class, new alter_table_with_environment_context_resultTupleSchemeFactory()); + schemes.put(StandardScheme.class, new alter_table_with_cascade_resultStandardSchemeFactory()); + schemes.put(TupleScheme.class, new alter_table_with_cascade_resultTupleSchemeFactory()); } private InvalidOperationException o1; // required @@ -36263,13 +37503,13 @@ public String getFieldName() { tmpMap.put(_Fields.O2, new org.apache.thrift.meta_data.FieldMetaData("o2", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT))); metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_with_environment_context_result.class, metaDataMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(alter_table_with_cascade_result.class, metaDataMap); } - public alter_table_with_environment_context_result() { + public alter_table_with_cascade_result() { } - public alter_table_with_environment_context_result( + public alter_table_with_cascade_result( InvalidOperationException o1, MetaException o2) { @@ -36281,7 +37521,7 @@ public alter_table_with_environment_context_result( /** * Performs a deep copy on other. */ - public alter_table_with_environment_context_result(alter_table_with_environment_context_result other) { + public alter_table_with_cascade_result(alter_table_with_cascade_result other) { if (other.isSetO1()) { this.o1 = new InvalidOperationException(other.o1); } @@ -36290,8 +37530,8 @@ public alter_table_with_environment_context_result(alter_table_with_environment_ } } - public alter_table_with_environment_context_result deepCopy() { - return new alter_table_with_environment_context_result(this); + public alter_table_with_cascade_result deepCopy() { + return new alter_table_with_cascade_result(this); } @Override @@ -36398,12 +37638,12 @@ public boolean isSet(_Fields field) { public boolean equals(Object that) { if (that == null) return false; - if (that instanceof alter_table_with_environment_context_result) - return this.equals((alter_table_with_environment_context_result)that); + if (that instanceof alter_table_with_cascade_result) + return this.equals((alter_table_with_cascade_result)that); return false; } - public boolean equals(alter_table_with_environment_context_result that) { + public boolean equals(alter_table_with_cascade_result that) { if (that == null) return false; @@ -36445,13 +37685,13 @@ public int hashCode() { return builder.toHashCode(); } - public int compareTo(alter_table_with_environment_context_result other) { + public int compareTo(alter_table_with_cascade_result other) { if (!getClass().equals(other.getClass())) { return getClass().getName().compareTo(other.getClass().getName()); } int lastComparison = 0; - alter_table_with_environment_context_result typedOther = (alter_table_with_environment_context_result)other; + alter_table_with_cascade_result typedOther = (alter_table_with_cascade_result)other; lastComparison = Boolean.valueOf(isSetO1()).compareTo(typedOther.isSetO1()); if (lastComparison != 0) { @@ -36490,7 +37730,7 @@ public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache. @Override public String toString() { - StringBuilder sb = new StringBuilder("alter_table_with_environment_context_result("); + StringBuilder sb = new StringBuilder("alter_table_with_cascade_result("); boolean first = true; sb.append("o1:"); @@ -36533,15 +37773,15 @@ private void readObject(java.io.ObjectInputStream in) throws java.io.IOException } } - private static class alter_table_with_environment_context_resultStandardSchemeFactory implements SchemeFactory { - public alter_table_with_environment_context_resultStandardScheme getScheme() { - return new alter_table_with_environment_context_resultStandardScheme(); + private static class alter_table_with_cascade_resultStandardSchemeFactory implements SchemeFactory { + public alter_table_with_cascade_resultStandardScheme getScheme() { + return new alter_table_with_cascade_resultStandardScheme(); } } - private static class alter_table_with_environment_context_resultStandardScheme extends StandardScheme { + private static class alter_table_with_cascade_resultStandardScheme extends StandardScheme { - public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_cascade_result struct) throws org.apache.thrift.TException { org.apache.thrift.protocol.TField schemeField; iprot.readStructBegin(); while (true) @@ -36578,7 +37818,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, alter_table_with_en struct.validate(); } - public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_cascade_result struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); @@ -36598,16 +37838,16 @@ public void write(org.apache.thrift.protocol.TProtocol oprot, alter_table_with_e } - private static class alter_table_with_environment_context_resultTupleSchemeFactory implements SchemeFactory { - public alter_table_with_environment_context_resultTupleScheme getScheme() { - return new alter_table_with_environment_context_resultTupleScheme(); + private static class alter_table_with_cascade_resultTupleSchemeFactory implements SchemeFactory { + public alter_table_with_cascade_resultTupleScheme getScheme() { + return new alter_table_with_cascade_resultTupleScheme(); } } - private static class alter_table_with_environment_context_resultTupleScheme extends TupleScheme { + private static class alter_table_with_cascade_resultTupleScheme extends TupleScheme { @Override - public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { + public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_cascade_result struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; BitSet optionals = new BitSet(); if (struct.isSetO1()) { @@ -36626,7 +37866,7 @@ public void write(org.apache.thrift.protocol.TProtocol prot, alter_table_with_en } @Override - public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_environment_context_result struct) throws org.apache.thrift.TException { + public void read(org.apache.thrift.protocol.TProtocol prot, alter_table_with_cascade_result struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; BitSet incoming = iprot.readBitSet(2); if (incoming.get(0)) { diff --git a/metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php b/metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php index 69285ca..69978d3 100644 --- a/metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php +++ b/metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php @@ -41,6 +41,7 @@ interface ThriftHiveMetastoreIf extends \FacebookServiceIf { public function get_table_names_by_filter($dbname, $filter, $max_tables); public function alter_table($dbname, $tbl_name, \metastore\Table $new_tbl); public function alter_table_with_environment_context($dbname, $tbl_name, \metastore\Table $new_tbl, \metastore\EnvironmentContext $environment_context); + public function alter_table_with_cascade($dbname, $tbl_name, \metastore\Table $new_tbl, $cascade); public function add_partition(\metastore\Partition $new_part); public function add_partition_with_environment_context(\metastore\Partition $new_part, \metastore\EnvironmentContext $environment_context); public function add_partitions($new_parts); @@ -1567,6 +1568,63 @@ class ThriftHiveMetastoreClient extends \FacebookServiceClient implements \metas return; } + public function alter_table_with_cascade($dbname, $tbl_name, \metastore\Table $new_tbl, $cascade) + { + $this->send_alter_table_with_cascade($dbname, $tbl_name, $new_tbl, $cascade); + $this->recv_alter_table_with_cascade(); + } + + public function send_alter_table_with_cascade($dbname, $tbl_name, \metastore\Table $new_tbl, $cascade) + { + $args = new \metastore\ThriftHiveMetastore_alter_table_with_cascade_args(); + $args->dbname = $dbname; + $args->tbl_name = $tbl_name; + $args->new_tbl = $new_tbl; + $args->cascade = $cascade; + $bin_accel = ($this->output_ instanceof TProtocol::$TBINARYPROTOCOLACCELERATED) && function_exists('thrift_protocol_write_binary'); + if ($bin_accel) + { + thrift_protocol_write_binary($this->output_, 'alter_table_with_cascade', TMessageType::CALL, $args, $this->seqid_, $this->output_->isStrictWrite()); + } + else + { + $this->output_->writeMessageBegin('alter_table_with_cascade', TMessageType::CALL, $this->seqid_); + $args->write($this->output_); + $this->output_->writeMessageEnd(); + $this->output_->getTransport()->flush(); + } + } + + public function recv_alter_table_with_cascade() + { + $bin_accel = ($this->input_ instanceof TProtocol::$TBINARYPROTOCOLACCELERATED) && function_exists('thrift_protocol_read_binary'); + if ($bin_accel) $result = thrift_protocol_read_binary($this->input_, '\metastore\ThriftHiveMetastore_alter_table_with_cascade_result', $this->input_->isStrictRead()); + else + { + $rseqid = 0; + $fname = null; + $mtype = 0; + + $this->input_->readMessageBegin($fname, $mtype, $rseqid); + if ($mtype == TMessageType::EXCEPTION) { + $x = new TApplicationException(); + $x->read($this->input_); + $this->input_->readMessageEnd(); + throw $x; + } + $result = new \metastore\ThriftHiveMetastore_alter_table_with_cascade_result(); + $result->read($this->input_); + $this->input_->readMessageEnd(); + } + if ($result->o1 !== null) { + throw $result->o1; + } + if ($result->o2 !== null) { + throw $result->o2; + } + return; + } + public function add_partition(\metastore\Partition $new_part) { $this->send_add_partition($new_part); @@ -12177,6 +12235,239 @@ class ThriftHiveMetastore_alter_table_with_environment_context_result { } +class ThriftHiveMetastore_alter_table_with_cascade_args { + static $_TSPEC; + + public $dbname = null; + public $tbl_name = null; + public $new_tbl = null; + public $cascade = null; + + public function __construct($vals=null) { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + 1 => array( + 'var' => 'dbname', + 'type' => TType::STRING, + ), + 2 => array( + 'var' => 'tbl_name', + 'type' => TType::STRING, + ), + 3 => array( + 'var' => 'new_tbl', + 'type' => TType::STRUCT, + 'class' => '\metastore\Table', + ), + 4 => array( + 'var' => 'cascade', + 'type' => TType::BOOL, + ), + ); + } + if (is_array($vals)) { + if (isset($vals['dbname'])) { + $this->dbname = $vals['dbname']; + } + if (isset($vals['tbl_name'])) { + $this->tbl_name = $vals['tbl_name']; + } + if (isset($vals['new_tbl'])) { + $this->new_tbl = $vals['new_tbl']; + } + if (isset($vals['cascade'])) { + $this->cascade = $vals['cascade']; + } + } + } + + public function getName() { + return 'ThriftHiveMetastore_alter_table_with_cascade_args'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + case 1: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->dbname); + } else { + $xfer += $input->skip($ftype); + } + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->tbl_name); + } else { + $xfer += $input->skip($ftype); + } + break; + case 3: + if ($ftype == TType::STRUCT) { + $this->new_tbl = new \metastore\Table(); + $xfer += $this->new_tbl->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + case 4: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->cascade); + } else { + $xfer += $input->skip($ftype); + } + break; + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('ThriftHiveMetastore_alter_table_with_cascade_args'); + if ($this->dbname !== null) { + $xfer += $output->writeFieldBegin('dbname', TType::STRING, 1); + $xfer += $output->writeString($this->dbname); + $xfer += $output->writeFieldEnd(); + } + if ($this->tbl_name !== null) { + $xfer += $output->writeFieldBegin('tbl_name', TType::STRING, 2); + $xfer += $output->writeString($this->tbl_name); + $xfer += $output->writeFieldEnd(); + } + if ($this->new_tbl !== null) { + if (!is_object($this->new_tbl)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('new_tbl', TType::STRUCT, 3); + $xfer += $this->new_tbl->write($output); + $xfer += $output->writeFieldEnd(); + } + if ($this->cascade !== null) { + $xfer += $output->writeFieldBegin('cascade', TType::BOOL, 4); + $xfer += $output->writeBool($this->cascade); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + +class ThriftHiveMetastore_alter_table_with_cascade_result { + static $_TSPEC; + + public $o1 = null; + public $o2 = null; + + public function __construct($vals=null) { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + 1 => array( + 'var' => 'o1', + 'type' => TType::STRUCT, + 'class' => '\metastore\InvalidOperationException', + ), + 2 => array( + 'var' => 'o2', + 'type' => TType::STRUCT, + 'class' => '\metastore\MetaException', + ), + ); + } + if (is_array($vals)) { + if (isset($vals['o1'])) { + $this->o1 = $vals['o1']; + } + if (isset($vals['o2'])) { + $this->o2 = $vals['o2']; + } + } + } + + public function getName() { + return 'ThriftHiveMetastore_alter_table_with_cascade_result'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + case 1: + if ($ftype == TType::STRUCT) { + $this->o1 = new \metastore\InvalidOperationException(); + $xfer += $this->o1->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->o2 = new \metastore\MetaException(); + $xfer += $this->o2->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('ThriftHiveMetastore_alter_table_with_cascade_result'); + if ($this->o1 !== null) { + $xfer += $output->writeFieldBegin('o1', TType::STRUCT, 1); + $xfer += $this->o1->write($output); + $xfer += $output->writeFieldEnd(); + } + if ($this->o2 !== null) { + $xfer += $output->writeFieldBegin('o2', TType::STRUCT, 2); + $xfer += $this->o2->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + class ThriftHiveMetastore_add_partition_args { static $_TSPEC; diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote b/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote index 068895e..59c0393 100644 --- a/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote +++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote @@ -48,6 +48,7 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help': print ' get_table_names_by_filter(string dbname, string filter, i16 max_tables)' print ' void alter_table(string dbname, string tbl_name, Table new_tbl)' print ' void alter_table_with_environment_context(string dbname, string tbl_name, Table new_tbl, EnvironmentContext environment_context)' + print ' void alter_table_with_cascade(string dbname, string tbl_name, Table new_tbl, bool cascade)' print ' Partition add_partition(Partition new_part)' print ' Partition add_partition_with_environment_context(Partition new_part, EnvironmentContext environment_context)' print ' i32 add_partitions( new_parts)' @@ -340,6 +341,12 @@ elif cmd == 'alter_table_with_environment_context': sys.exit(1) pp.pprint(client.alter_table_with_environment_context(args[0],args[1],eval(args[2]),eval(args[3]),)) +elif cmd == 'alter_table_with_cascade': + if len(args) != 4: + print 'alter_table_with_cascade requires 4 args' + sys.exit(1) + pp.pprint(client.alter_table_with_cascade(args[0],args[1],eval(args[2]),eval(args[3]),)) + elif cmd == 'add_partition': if len(args) != 1: print 'add_partition requires 1 args' diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py index 6160b7e..100a7cb 100644 --- a/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py +++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py @@ -215,6 +215,16 @@ def alter_table_with_environment_context(self, dbname, tbl_name, new_tbl, enviro """ pass + def alter_table_with_cascade(self, dbname, tbl_name, new_tbl, cascade): + """ + Parameters: + - dbname + - tbl_name + - new_tbl + - cascade + """ + pass + def add_partition(self, new_part): """ Parameters: @@ -1851,6 +1861,44 @@ def recv_alter_table_with_environment_context(self, ): raise result.o2 return + def alter_table_with_cascade(self, dbname, tbl_name, new_tbl, cascade): + """ + Parameters: + - dbname + - tbl_name + - new_tbl + - cascade + """ + self.send_alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade) + self.recv_alter_table_with_cascade() + + def send_alter_table_with_cascade(self, dbname, tbl_name, new_tbl, cascade): + self._oprot.writeMessageBegin('alter_table_with_cascade', TMessageType.CALL, self._seqid) + args = alter_table_with_cascade_args() + args.dbname = dbname + args.tbl_name = tbl_name + args.new_tbl = new_tbl + args.cascade = cascade + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_alter_table_with_cascade(self, ): + (fname, mtype, rseqid) = self._iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(self._iprot) + self._iprot.readMessageEnd() + raise x + result = alter_table_with_cascade_result() + result.read(self._iprot) + self._iprot.readMessageEnd() + if result.o1 is not None: + raise result.o1 + if result.o2 is not None: + raise result.o2 + return + def add_partition(self, new_part): """ Parameters: @@ -5177,6 +5225,7 @@ def __init__(self, handler): self._processMap["get_table_names_by_filter"] = Processor.process_get_table_names_by_filter self._processMap["alter_table"] = Processor.process_alter_table self._processMap["alter_table_with_environment_context"] = Processor.process_alter_table_with_environment_context + self._processMap["alter_table_with_cascade"] = Processor.process_alter_table_with_cascade self._processMap["add_partition"] = Processor.process_add_partition self._processMap["add_partition_with_environment_context"] = Processor.process_add_partition_with_environment_context self._processMap["add_partitions"] = Processor.process_add_partitions @@ -5692,6 +5741,22 @@ def process_alter_table_with_environment_context(self, seqid, iprot, oprot): oprot.writeMessageEnd() oprot.trans.flush() + def process_alter_table_with_cascade(self, seqid, iprot, oprot): + args = alter_table_with_cascade_args() + args.read(iprot) + iprot.readMessageEnd() + result = alter_table_with_cascade_result() + try: + self._handler.alter_table_with_cascade(args.dbname, args.tbl_name, args.new_tbl, args.cascade) + except InvalidOperationException as o1: + result.o1 = o1 + except MetaException as o2: + result.o2 = o2 + oprot.writeMessageBegin("alter_table_with_cascade", TMessageType.REPLY, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + def process_add_partition(self, seqid, iprot, oprot): args = add_partition_args() args.read(iprot) @@ -11052,6 +11117,177 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) +class alter_table_with_cascade_args: + """ + Attributes: + - dbname + - tbl_name + - new_tbl + - cascade + """ + + thrift_spec = ( + None, # 0 + (1, TType.STRING, 'dbname', None, None, ), # 1 + (2, TType.STRING, 'tbl_name', None, None, ), # 2 + (3, TType.STRUCT, 'new_tbl', (Table, Table.thrift_spec), None, ), # 3 + (4, TType.BOOL, 'cascade', None, None, ), # 4 + ) + + def __init__(self, dbname=None, tbl_name=None, new_tbl=None, cascade=None,): + self.dbname = dbname + self.tbl_name = tbl_name + self.new_tbl = new_tbl + self.cascade = cascade + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRING: + self.dbname = iprot.readString(); + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRING: + self.tbl_name = iprot.readString(); + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.STRUCT: + self.new_tbl = Table() + self.new_tbl.read(iprot) + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.BOOL: + self.cascade = iprot.readBool(); + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('alter_table_with_cascade_args') + if self.dbname is not None: + oprot.writeFieldBegin('dbname', TType.STRING, 1) + oprot.writeString(self.dbname) + oprot.writeFieldEnd() + if self.tbl_name is not None: + oprot.writeFieldBegin('tbl_name', TType.STRING, 2) + oprot.writeString(self.tbl_name) + oprot.writeFieldEnd() + if self.new_tbl is not None: + oprot.writeFieldBegin('new_tbl', TType.STRUCT, 3) + self.new_tbl.write(oprot) + oprot.writeFieldEnd() + if self.cascade is not None: + oprot.writeFieldBegin('cascade', TType.BOOL, 4) + oprot.writeBool(self.cascade) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + +class alter_table_with_cascade_result: + """ + Attributes: + - o1 + - o2 + """ + + thrift_spec = ( + None, # 0 + (1, TType.STRUCT, 'o1', (InvalidOperationException, InvalidOperationException.thrift_spec), None, ), # 1 + (2, TType.STRUCT, 'o2', (MetaException, MetaException.thrift_spec), None, ), # 2 + ) + + def __init__(self, o1=None, o2=None,): + self.o1 = o1 + self.o2 = o2 + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRUCT: + self.o1 = InvalidOperationException() + self.o1.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.o2 = MetaException() + self.o2.read(iprot) + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('alter_table_with_cascade_result') + if self.o1 is not None: + oprot.writeFieldBegin('o1', TType.STRUCT, 1) + self.o1.write(oprot) + oprot.writeFieldEnd() + if self.o2 is not None: + oprot.writeFieldBegin('o2', TType.STRUCT, 2) + self.o2.write(oprot) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + class add_partition_args: """ Attributes: diff --git a/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb b/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb index 9f59a8a..e6ef08a 100644 --- a/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb +++ b/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb @@ -431,6 +431,22 @@ module ThriftHiveMetastore return end + def alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade) + send_alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade) + recv_alter_table_with_cascade() + end + + def send_alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade) + send_message('alter_table_with_cascade', Alter_table_with_cascade_args, :dbname => dbname, :tbl_name => tbl_name, :new_tbl => new_tbl, :cascade => cascade) + end + + def recv_alter_table_with_cascade() + result = receive_message(Alter_table_with_cascade_result) + raise result.o1 unless result.o1.nil? + raise result.o2 unless result.o2.nil? + return + end + def add_partition(new_part) send_add_partition(new_part) return recv_add_partition() @@ -2299,6 +2315,19 @@ module ThriftHiveMetastore write_result(result, oprot, 'alter_table_with_environment_context', seqid) end + def process_alter_table_with_cascade(seqid, iprot, oprot) + args = read_args(iprot, Alter_table_with_cascade_args) + result = Alter_table_with_cascade_result.new() + begin + @handler.alter_table_with_cascade(args.dbname, args.tbl_name, args.new_tbl, args.cascade) + rescue ::InvalidOperationException => o1 + result.o1 = o1 + rescue ::MetaException => o2 + result.o2 = o2 + end + write_result(result, oprot, 'alter_table_with_cascade', seqid) + end + def process_add_partition(seqid, iprot, oprot) args = read_args(iprot, Add_partition_args) result = Add_partition_result.new() @@ -4399,6 +4428,46 @@ module ThriftHiveMetastore ::Thrift::Struct.generate_accessors self end + class Alter_table_with_cascade_args + include ::Thrift::Struct, ::Thrift::Struct_Union + DBNAME = 1 + TBL_NAME = 2 + NEW_TBL = 3 + CASCADE = 4 + + FIELDS = { + DBNAME => {:type => ::Thrift::Types::STRING, :name => 'dbname'}, + TBL_NAME => {:type => ::Thrift::Types::STRING, :name => 'tbl_name'}, + NEW_TBL => {:type => ::Thrift::Types::STRUCT, :name => 'new_tbl', :class => ::Table}, + CASCADE => {:type => ::Thrift::Types::BOOL, :name => 'cascade'} + } + + def struct_fields; FIELDS; end + + def validate + end + + ::Thrift::Struct.generate_accessors self + end + + class Alter_table_with_cascade_result + include ::Thrift::Struct, ::Thrift::Struct_Union + O1 = 1 + O2 = 2 + + FIELDS = { + O1 => {:type => ::Thrift::Types::STRUCT, :name => 'o1', :class => ::InvalidOperationException}, + O2 => {:type => ::Thrift::Types::STRUCT, :name => 'o2', :class => ::MetaException} + } + + def struct_fields; FIELDS; end + + def validate + end + + ::Thrift::Struct.generate_accessors self + end + class Add_partition_args include ::Thrift::Struct, ::Thrift::Struct_Union NEW_PART = 1 diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/AlterHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/AlterHandler.java index d872be5..b565304 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/AlterHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/AlterHandler.java @@ -49,13 +49,38 @@ * @throws InvalidOperationException * thrown if the newTable object is invalid * @throws MetaException - * thrown if there is any other erro + * thrown if there is any other error */ public abstract void alterTable(RawStore msdb, Warehouse wh, String dbname, String name, Table newTable) throws InvalidOperationException, MetaException; /** + * handles alter table, the changes could be cascaded to partitions if applicable + * + * @param msdb + * object to get metadata + * @param wh + * Hive Warehouse where table data is stored + * @param dbname + * database of the table being altered + * @param name + * original name of the table being altered. same as + * newTable.tableName if alter op is not a rename. + * @param newTable + * new table object + * @param cascade + * if the changes will be cascaded to its partitions if applicable + * @throws InvalidOperationException + * thrown if the newTable object is invalid + * @throws MetaException + * thrown if there is any other error + */ + public abstract void alterTable(RawStore msdb, Warehouse wh, String dbname, + String name, Table newTable, boolean cascade) throws InvalidOperationException, + MetaException; + + /** * handles alter partition * * @param msdb diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index fc6215a..d99cfdf 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -63,6 +63,11 @@ public void setConf(Configuration conf) { public void alterTable(RawStore msdb, Warehouse wh, String dbname, String name, Table newt) throws InvalidOperationException, MetaException { + alterTable(msdb, wh, dbname, name, newt, false); + } + + public void alterTable(RawStore msdb, Warehouse wh, String dbname, + String name, Table newt, boolean cascade) throws InvalidOperationException, MetaException { if (newt == null) { throw new InvalidOperationException("New table is invalid: " + newt); } @@ -118,6 +123,19 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, oldt.getSd().getCols(), newt.getSd().getCols()); } + if (cascade) { + //Currently only column related changes can be cascaded in alter table + if(MetaStoreUtils.isCascadeNeededInAlterTable(oldt, newt)) { + List parts = msdb.getPartitions(dbname, name, -1); + for (Partition part : parts) { + part.getSd().setCols(newt.getSd().getCols()); + msdb.alterPartition(dbname, name, part.getValues(), part); + } + } else { + LOG.warn("Alter table does not cascade changes to its partitions."); + } + } + //check that partition keys have not changed, except for virtual views //however, allow the partition comments to change boolean partKeysPartiallyEqual = checkPartialPartKeysEqual(oldt.getPartitionKeys(), diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 25b61a7..77a40fa 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -192,6 +192,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; import org.apache.hadoop.hive.thrift.TUGIContainingTransport; @@ -229,11 +230,15 @@ private static boolean isMetaStoreRemote = false; /** A fixed date format to be used for hive partition column values. */ - public static final DateFormat PARTITION_DATE_FORMAT; - static { - PARTITION_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); - PARTITION_DATE_FORMAT.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. - } + public static final ThreadLocal PARTITION_DATE_FORMAT = + new ThreadLocal() { + @Override + protected DateFormat initialValue() { + DateFormat val = new SimpleDateFormat("yyyy-MM-dd"); + val.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. + return val; + }; + }; /** * default port on which to start the Hive server @@ -326,7 +331,7 @@ private final void logAuditEvent(String cmd) { UserGroupInformation ugi; try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(getConf()); + ugi = Utils.getUGIForConf(getConf()); } catch (Exception ex) { throw new RuntimeException(ex); } @@ -3257,7 +3262,15 @@ public void alter_table(final String dbname, final String name, final Table newTable) throws InvalidOperationException, MetaException { // Do not set an environment context. - alter_table_with_environment_context(dbname, name, newTable, null); + alter_table_core(dbname,name, newTable, null, false); + } + + @Override + public void alter_table_with_cascade(final String dbname, final String name, + final Table newTable, final boolean cascade) + throws InvalidOperationException, MetaException { + // Do not set an environment context. + alter_table_core(dbname,name, newTable, null, cascade); } @Override @@ -3265,6 +3278,12 @@ public void alter_table_with_environment_context(final String dbname, final String name, final Table newTable, final EnvironmentContext envContext) throws InvalidOperationException, MetaException { + alter_table_core(dbname, name, newTable, envContext, false); + } + + private void alter_table_core(final String dbname, final String name, final Table newTable, + final EnvironmentContext envContext, final boolean cascade) + throws InvalidOperationException, MetaException { startFunction("alter_table", ": db=" + dbname + " tbl=" + name + " newtbl=" + newTable.getTableName()); @@ -3279,7 +3298,7 @@ public void alter_table_with_environment_context(final String dbname, try { Table oldt = get_table_core(dbname, name); firePreEvent(new PreAlterTableEvent(oldt, newTable, this)); - alterHandler.alterTable(getMS(), wh, dbname, name, newTable); + alterHandler.alterTable(getMS(), wh, dbname, name, newTable, cascade); success = true; for (MetaStoreEventListener listener : listeners) { @@ -5826,7 +5845,8 @@ public static void startMetaStore(int port, HadoopThriftAuthBridge bridge, .processor(processor) .transportFactory(transFactory) .protocolFactory(new TBinaryProtocol.Factory()) - .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize)) + .inputProtocolFactory( + new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize)) .minWorkerThreads(minWorkerThreads) .maxWorkerThreads(maxWorkerThreads); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index b331754..a9e1cbf 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -125,6 +125,7 @@ import org.apache.hadoop.hive.metastore.txn.TxnHandler; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; @@ -311,6 +312,12 @@ public void alter_table(String dbname, String tbl_name, Table new_tbl) alter_table(dbname, tbl_name, new_tbl, null); } + @Override + public void alter_table(String dbname, String tbl_name, Table new_tbl, boolean cascade) + throws InvalidOperationException, MetaException, TException { + client.alter_table_with_cascade(dbname, tbl_name, new_tbl, cascade); + } + public void alter_table(String dbname, String tbl_name, Table new_tbl, EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { client.alter_table_with_environment_context(dbname, tbl_name, new_tbl, envContext); @@ -336,7 +343,6 @@ public void renamePartition(final String dbname, final String name, final List listPartitionNames(String dbName, String tblName, short max) throws MetaException, TException { - return filterHook.filterPartitionNames(dbName, tblName, + return filterHook.filterPartitionNames(dbName, tblName, client.get_partition_names(dbName, tblName, max)); } @@ -1210,7 +1216,7 @@ public boolean tableExists(String tableName) throws MetaException, public List listPartitionNames(String db_name, String tbl_name, List part_vals, short max_parts) throws MetaException, TException, NoSuchObjectException { - return filterHook.filterPartitionNames(db_name, tbl_name, + return filterHook.filterPartitionNames(db_name, tbl_name, client.get_partition_names_ps(db_name, tbl_name, part_vals, max_parts)); } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 066ab68..11b0336 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -616,6 +616,10 @@ void createTable(Table tbl) throws AlreadyExistsException, void alter_table(String defaultDatabaseName, String tblName, Table table) throws InvalidOperationException, MetaException, TException; + //alter_table_with_cascade + void alter_table(String defaultDatabaseName, String tblName, Table table, + boolean cascade) throws InvalidOperationException, MetaException, TException; + void createDatabase(Database db) throws InvalidObjectException, AlreadyExistsException, MetaException, TException; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index 35f6f26..564ac8b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -980,7 +980,7 @@ public void visit(LeafNode node) throws MetaException { // TODO: Filter.g cannot parse a quoted date; try to parse date here too. try { nodeValue = new java.sql.Date( - HiveMetaStore.PARTITION_DATE_FORMAT.parse((String)nodeValue).getTime()); + HiveMetaStore.PARTITION_DATE_FORMAT.get().parse((String)nodeValue).getTime()); valType = FilterType.Date; } catch (ParseException pe) { // do nothing, handled below - types will mismatch } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 1ac5aff..2db2658 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -554,6 +554,26 @@ static void throwExceptionIfIncompatibleColTypeChange( } } + static boolean isCascadeNeededInAlterTable(Table oldTable, Table newTable) { + List oldCols = oldTable.getSd().getCols(); + List newCols = newTable.getSd().getCols(); + + //currently cascade only supports add/replace columns and + //changing column type/position/name/comments + if (oldCols.size() != newCols.size()) { + return true; + } else { + for (int i = 0; i < oldCols.size(); i++) { + FieldSchema oldCol = oldCols.get(i); + FieldSchema newCol = newCols.get(i); + if(!oldCol.equals(newCol)) { + return true; + } + } + } + return false; + } + /** * @return true if oldType and newType are compatible. * Two types are compatible if we have internal functions to cast one to another. diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 2a42a17..49ddb2e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -137,6 +137,7 @@ import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.util.StringUtils; +import org.apache.hive.common.util.HiveStringUtils; import org.apache.thrift.TException; import org.datanucleus.store.rdbms.exceptions.MissingTableException; @@ -466,12 +467,15 @@ public void rollbackTransaction() { if (currentTransaction.isActive() && transactionStatus != TXN_STATUS.ROLLBACK) { transactionStatus = TXN_STATUS.ROLLBACK; - // could already be rolled back - currentTransaction.rollback(); - // remove all detached objects from the cache, since the transaction is - // being rolled back they are no longer relevant, and this prevents them - // from reattaching in future transactions - pm.evictAll(); + try { + // could already be rolled back + currentTransaction.rollback(); + } finally { + // remove all detached objects from the cache, since the transaction is + // being rolled back they are no longer relevant, and this prevents them + // from reattaching in future transactions + pm.evictAll(); + } } } @@ -503,7 +507,7 @@ private MDatabase getMDatabase(String name) throws NoSuchObjectException { boolean commited = false; try { openTransaction(); - name = name.toLowerCase().trim(); + name = HiveStringUtils.normalizeIdentifier(name); Query query = pm.newQuery(MDatabase.class, "name == dbname"); query.declareParameters("java.lang.String dbname"); query.setUnique(true); @@ -615,7 +619,7 @@ public boolean alterDatabase(String dbName, Database db) public boolean dropDatabase(String dbname) throws NoSuchObjectException, MetaException { boolean success = false; LOG.info("Dropping database " + dbname + " along with all tables"); - dbname = dbname.toLowerCase(); + dbname = HiveStringUtils.normalizeIdentifier(dbname); try { openTransaction(); @@ -912,7 +916,7 @@ public Table getTable(String dbName, String tableName) throws MetaException { List tbls = null; try { openTransaction(); - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); // Take the pattern and split it on the | to get all the composing // patterns String[] subpatterns = pattern.trim().split("\\|"); @@ -958,8 +962,8 @@ private MTable getMTable(String db, String table) { boolean commited = false; try { openTransaction(); - db = db.toLowerCase().trim(); - table = table.toLowerCase().trim(); + db = HiveStringUtils.normalizeIdentifier(db); + table = HiveStringUtils.normalizeIdentifier(table); Query query = pm.newQuery(MTable.class, "tableName == table && database.name == db"); query.declareParameters("java.lang.String table, java.lang.String db"); query.setUnique(true); @@ -982,7 +986,7 @@ private MTable getMTable(String db, String table) { try { openTransaction(); - db = db.toLowerCase().trim(); + db = HiveStringUtils.normalizeIdentifier(db); Query dbExistsQuery = pm.newQuery(MDatabase.class, "name == db"); dbExistsQuery.declareParameters("java.lang.String db"); dbExistsQuery.setUnique(true); @@ -994,7 +998,7 @@ private MTable getMTable(String db, String table) { List lowered_tbl_names = new ArrayList(); for (String t : tbl_names) { - lowered_tbl_names.add(t.toLowerCase().trim()); + lowered_tbl_names.add(HiveStringUtils.normalizeIdentifier(t)); } Query query = pm.newQuery(MTable.class); query.setFilter("database.name == db && tbl_names.contains(tableName)"); @@ -1019,7 +1023,8 @@ private MTable getMTable(String db, String table) { /** Makes shallow copy of a map to avoid DataNucleus mucking with our objects. */ private Map convertMap(Map dnMap) { - return (dnMap == null) ? null : Maps.newHashMap(dnMap); + // Must be deterministic order map - see HIVE-8707 + return (dnMap == null) ? null : Maps.newLinkedHashMap(dnMap); } private Table convertToTable(MTable mtbl) throws MetaException { @@ -1074,7 +1079,7 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, } // A new table is always created with a new column descriptor - return new MTable(tbl.getTableName().toLowerCase(), mdb, + return new MTable(HiveStringUtils.normalizeIdentifier(tbl.getTableName()), mdb, convertToMStorageDescriptor(tbl.getSd()), tbl.getOwner(), tbl .getCreateTime(), tbl.getLastAccessTime(), tbl.getRetention(), convertToMFieldSchemas(tbl.getPartitionKeys()), tbl.getParameters(), @@ -1087,7 +1092,7 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, if (keys != null) { mkeys = new ArrayList(keys.size()); for (FieldSchema part : keys) { - mkeys.add(new MFieldSchema(part.getName().toLowerCase(), + mkeys.add(new MFieldSchema(HiveStringUtils.normalizeIdentifier(part.getName()), part.getType(), part.getComment())); } } @@ -1111,7 +1116,7 @@ private MTable convertToMTable(Table tbl) throws InvalidObjectException, if (keys != null) { mkeys = new ArrayList(keys.size()); for (Order part : keys) { - mkeys.add(new MOrder(part.getCol().toLowerCase(), part.getOrder())); + mkeys.add(new MOrder(HiveStringUtils.normalizeIdentifier(part.getCol()), part.getOrder())); } } return mkeys; @@ -1486,8 +1491,8 @@ private MPartition getMPartition(String dbName, String tableName, boolean commited = false; try { openTransaction(); - dbName = dbName.toLowerCase().trim(); - tableName = tableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + tableName = HiveStringUtils.normalizeIdentifier(tableName); MTable mtbl = getMTable(dbName, tableName); if (mtbl == null) { commited = commitTransaction(); @@ -1820,8 +1825,8 @@ public Partition getPartitionWithAuth(String dbName, String tblName, private List getPartitionNamesNoTxn(String dbName, String tableName, short max) { List pns = new ArrayList(); - dbName = dbName.toLowerCase().trim(); - tableName = tableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + tableName = HiveStringUtils.normalizeIdentifier(tableName); Query q = pm.newQuery( "select partitionName from org.apache.hadoop.hive.metastore.model.MPartition " + "where table.database.name == t1 && table.tableName == t2 " @@ -1857,8 +1862,8 @@ public Partition getPartitionWithAuth(String dbName, String tblName, private Collection getPartitionPsQueryResults(String dbName, String tableName, List part_vals, short max_parts, String resultsCol) throws MetaException, NoSuchObjectException { - dbName = dbName.toLowerCase().trim(); - tableName = tableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + tableName = HiveStringUtils.normalizeIdentifier(tableName); Table table = getTable(dbName, tableName); if (table == null) { @@ -1967,8 +1972,8 @@ private Collection getPartitionPsQueryResults(String dbName, String tableName, try { openTransaction(); LOG.debug("Executing listMPartitions"); - dbName = dbName.toLowerCase().trim(); - tableName = tableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + tableName = HiveStringUtils.normalizeIdentifier(tableName); Query query = pm.newQuery(MPartition.class, "table.tableName == t1 && table.database.name == t2"); query.declareParameters("java.lang.String t1, java.lang.String t2"); @@ -2289,8 +2294,8 @@ private void dropPartitionsNoTxn(String dbName, String tblName, List par query.setFilter(sb.toString()); LOG.debug(" JDOQL filter is " + sb.toString()); - params.put("t1", tblName.trim().toLowerCase()); - params.put("t2", dbName.trim().toLowerCase()); + params.put("t1", HiveStringUtils.normalizeIdentifier(tblName)); + params.put("t2", HiveStringUtils.normalizeIdentifier(dbName)); query.declareParameters(makeParameterDeclarationString(params)); return new ObjectPair>(query, params); @@ -2316,9 +2321,9 @@ public GetHelper(String dbName, String tblName, boolean allowSql, boolean allowJ throws MetaException { assert allowSql || allowJdo; this.allowJdo = allowJdo; - this.dbName = dbName.toLowerCase(); + this.dbName = HiveStringUtils.normalizeIdentifier(dbName); if (tblName != null){ - this.tblName = tblName.toLowerCase(); + this.tblName = HiveStringUtils.normalizeIdentifier(tblName); } else { // tblName can be null in cases of Helper being used at a higher // abstraction level, such as with datbases @@ -2619,7 +2624,7 @@ private String makeParameterDeclarationStringObj(Map params) { try { openTransaction(); LOG.debug("Executing listTableNamesByFilter"); - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); Map params = new HashMap(); String queryFilterString = makeQueryFilterString(dbName, null, filter, params); Query query = pm.newQuery(MTable.class); @@ -2664,8 +2669,8 @@ private String makeParameterDeclarationStringObj(Map params) { try { openTransaction(); LOG.debug("Executing listMPartitionNamesByFilter"); - dbName = dbName.toLowerCase(); - tableName = tableName.toLowerCase(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + tableName = HiveStringUtils.normalizeIdentifier(tableName); MTable mtable = getMTable(dbName, tableName); if( mtable == null ) { @@ -2716,8 +2721,8 @@ public void alterTable(String dbname, String name, Table newTable) boolean success = false; try { openTransaction(); - name = name.toLowerCase(); - dbname = dbname.toLowerCase(); + name = HiveStringUtils.normalizeIdentifier(name); + dbname = HiveStringUtils.normalizeIdentifier(dbname); MTable newt = convertToMTable(newTable); if (newt == null) { throw new InvalidObjectException("new table is invalid"); @@ -2730,7 +2735,7 @@ public void alterTable(String dbname, String name, Table newTable) // For now only alter name, owner, parameters, cols, bucketcols are allowed oldt.setDatabase(newt.getDatabase()); - oldt.setTableName(newt.getTableName().toLowerCase()); + oldt.setTableName(HiveStringUtils.normalizeIdentifier(newt.getTableName())); oldt.setParameters(newt.getParameters()); oldt.setOwner(newt.getOwner()); // Fully copy over the contents of the new SD into the old SD, @@ -2758,9 +2763,9 @@ public void alterIndex(String dbname, String baseTblName, String name, Index new boolean success = false; try { openTransaction(); - name = name.toLowerCase(); - baseTblName = baseTblName.toLowerCase(); - dbname = dbname.toLowerCase(); + name = HiveStringUtils.normalizeIdentifier(name); + baseTblName = HiveStringUtils.normalizeIdentifier(baseTblName); + dbname = HiveStringUtils.normalizeIdentifier(dbname); MIndex newi = convertToMIndex(newIndex); if (newi == null) { throw new InvalidObjectException("new index is invalid"); @@ -2785,8 +2790,8 @@ public void alterIndex(String dbname, String baseTblName, String name, Index new private void alterPartitionNoTxn(String dbname, String name, List part_vals, Partition newPart) throws InvalidObjectException, MetaException { - name = name.toLowerCase(); - dbname = dbname.toLowerCase(); + name = HiveStringUtils.normalizeIdentifier(name); + dbname = HiveStringUtils.normalizeIdentifier(dbname); MPartition oldp = getMPartition(dbname, name, part_vals); MPartition newp = convertToMPart(newPart, false); if (oldp == null || newp == null) { @@ -3019,7 +3024,7 @@ private MIndex convertToMIndex(Index index) throws InvalidObjectException, "Underlying index table does not exist for the given index."); } - return new MIndex(index.getIndexName().toLowerCase(), origTable, index.getCreateTime(), + return new MIndex(HiveStringUtils.normalizeIdentifier(index.getIndexName()), origTable, index.getCreateTime(), index.getLastAccessTime(), index.getParameters(), indexTable, msd, index.getIndexHandlerClass(), index.isDeferredRebuild()); } @@ -3048,8 +3053,8 @@ private MIndex getMIndex(String dbName, String originalTblName, String indexName boolean commited = false; try { openTransaction(); - dbName = dbName.toLowerCase().trim(); - originalTblName = originalTblName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + originalTblName = HiveStringUtils.normalizeIdentifier(originalTblName); MTable mtbl = getMTable(dbName, originalTblName); if (mtbl == null) { commited = commitTransaction(); @@ -3060,7 +3065,8 @@ private MIndex getMIndex(String dbName, String originalTblName, String indexName "origTable.tableName == t1 && origTable.database.name == t2 && indexName == t3"); query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); query.setUnique(true); - midx = (MIndex) query.execute(originalTblName, dbName, indexName.toLowerCase()); + midx = (MIndex) query.execute(originalTblName, dbName, + HiveStringUtils.normalizeIdentifier(indexName)); pm.retrieve(midx); commited = commitTransaction(); } finally { @@ -3134,8 +3140,8 @@ private Index convertToIndex(MIndex mIndex) throws MetaException { try { openTransaction(); LOG.debug("Executing listMIndexes"); - dbName = dbName.toLowerCase().trim(); - origTableName = origTableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + origTableName = HiveStringUtils.normalizeIdentifier(origTableName); Query query = pm.newQuery(MIndex.class, "origTable.tableName == t1 && origTable.database.name == t2"); query.declareParameters("java.lang.String t1, java.lang.String t2"); @@ -3160,8 +3166,8 @@ private Index convertToIndex(MIndex mIndex) throws MetaException { try { openTransaction(); LOG.debug("Executing listIndexNames"); - dbName = dbName.toLowerCase().trim(); - origTableName = origTableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + origTableName = HiveStringUtils.normalizeIdentifier(origTableName); Query q = pm.newQuery( "select indexName from org.apache.hadoop.hive.metastore.model.MIndex " + "where origTable.database.name == t1 && origTable.tableName == t2 " @@ -3582,7 +3588,7 @@ public PrincipalPrivilegeSet getUserPrivilegeSet(String userName, public List getDBPrivilege(String dbName, String principalName, PrincipalType principalType) throws InvalidObjectException, MetaException { - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); if (principalName != null) { List userNameDbPriv = this.listPrincipalDBGrants( @@ -3608,7 +3614,7 @@ public PrincipalPrivilegeSet getDBPrivilegeSet(String dbName, String userName, List groupNames) throws InvalidObjectException, MetaException { boolean commited = false; - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); try { @@ -3651,8 +3657,8 @@ public PrincipalPrivilegeSet getPartitionPrivilegeSet(String dbName, List groupNames) throws InvalidObjectException, MetaException { boolean commited = false; PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); try { openTransaction(); @@ -3694,8 +3700,8 @@ public PrincipalPrivilegeSet getTablePrivilegeSet(String dbName, throws InvalidObjectException, MetaException { boolean commited = false; PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); try { openTransaction(); @@ -3736,9 +3742,9 @@ public PrincipalPrivilegeSet getColumnPrivilegeSet(String dbName, String tableName, String partitionName, String columnName, String userName, List groupNames) throws InvalidObjectException, MetaException { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); - columnName = columnName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + columnName = HiveStringUtils.normalizeIdentifier(columnName); boolean commited = false; PrincipalPrivilegeSet ret = new PrincipalPrivilegeSet(); @@ -3780,8 +3786,8 @@ public PrincipalPrivilegeSet getColumnPrivilegeSet(String dbName, String tableName, String partName, String principalName, PrincipalType principalType) { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); if (principalName != null) { List userNameTabPartPriv = this @@ -3809,8 +3815,8 @@ private PrincipalType getPrincipalTypeFromStr(String str) { private List getTablePrivilege(String dbName, String tableName, String principalName, PrincipalType principalType) { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); if (principalName != null) { List userNameTabPartPriv = this @@ -3835,9 +3841,9 @@ private PrincipalType getPrincipalTypeFromStr(String str) { String tableName, String columnName, String partitionName, String principalName, PrincipalType principalType) { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); - columnName = columnName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + columnName = HiveStringUtils.normalizeIdentifier(columnName); if (partitionName == null) { List userNameColumnPriv = this @@ -4421,7 +4427,7 @@ public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) PrincipalType principalType, String dbName) { boolean success = false; List mSecurityDBList = null; - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); try { openTransaction(); @@ -4505,11 +4511,11 @@ public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) public List listAllTableGrants(String dbName, String tableName) { boolean success = false; - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); List mSecurityTabList = null; - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); try { openTransaction(); LOG.debug("Executing listAllTableGrants"); @@ -4536,8 +4542,8 @@ public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) @SuppressWarnings("unchecked") public List listTableAllPartitionGrants(String dbName, String tableName) { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); boolean success = false; List mSecurityTabPartList = null; try { @@ -4568,8 +4574,8 @@ public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) String tableName) { boolean success = false; List mTblColPrivilegeList = null; - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); try { openTransaction(); @@ -4595,8 +4601,8 @@ public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) public List listTableAllPartitionColumnGrants(String dbName, String tableName) { boolean success = false; - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); List mSecurityColList = null; try { @@ -4623,8 +4629,8 @@ public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) public List listPartitionAllColumnGrants(String dbName, String tableName, List partNames) { boolean success = false; - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); List mSecurityColList = null; try { @@ -4655,7 +4661,7 @@ public void dropPartitionAllColumnGrantsNoTxn( @SuppressWarnings("unchecked") private List listDatabaseGrants(String dbName) { - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); boolean success = false; try { @@ -4681,8 +4687,8 @@ public void dropPartitionAllColumnGrantsNoTxn( @SuppressWarnings("unchecked") private List listPartitionGrants(String dbName, String tableName, List partNames) { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); boolean success = false; List mSecurityTabPartList = null; @@ -4725,8 +4731,8 @@ private void dropPartitionGrantsNoTxn(String dbName, String tableName, List listAllTableGrants( String principalName, PrincipalType principalType, String dbName, String tableName) { - tableName = tableName.toLowerCase().trim(); - dbName = dbName.toLowerCase().trim(); + tableName = HiveStringUtils.normalizeIdentifier(tableName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); boolean success = false; List mSecurityTabPartList = null; @@ -4779,8 +4785,8 @@ private void dropPartitionGrantsNoTxn(String dbName, String tableName, List mSecurityTabPartList = null; try { @@ -4814,9 +4820,9 @@ private void dropPartitionGrantsNoTxn(String dbName, String tableName, List mSecurityColList = null; try { openTransaction(); @@ -4848,9 +4854,9 @@ private void dropPartitionGrantsNoTxn(String dbName, String tableName, List mSecurityColList = null; try { @@ -6001,7 +6007,8 @@ public ColumnStatistics getTableColumnStatistics(String dbName, String tableName protected ColumnStatistics getTableColumnStatisticsInternal( String dbName, String tableName, final List colNames, boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException { - return new GetStatHelper(dbName.toLowerCase(), tableName.toLowerCase(), allowSql, allowJdo) { + return new GetStatHelper(HiveStringUtils.normalizeIdentifier(dbName), + HiveStringUtils.normalizeIdentifier(tableName), allowSql, allowJdo) { @Override protected ColumnStatistics getSqlResult(GetHelper ctx) throws MetaException { return directSql.getTableStats(dbName, tblName, colNames); @@ -6218,7 +6225,9 @@ public boolean deletePartitionColumnStatistics(String dbName, String tableName, if (colName != null) { query.setUnique(true); mStatsObj = (MPartitionColumnStatistics)query.executeWithArray(partName.trim(), - dbName.trim(), tableName.trim(), colName.trim()); + HiveStringUtils.normalizeIdentifier(dbName), + HiveStringUtils.normalizeIdentifier(tableName), + HiveStringUtils.normalizeIdentifier(colName)); pm.retrieve(mStatsObj); if (mStatsObj != null) { @@ -6229,7 +6238,8 @@ public boolean deletePartitionColumnStatistics(String dbName, String tableName, } } else { mStatsObjColl= (List)query.execute(partName.trim(), - dbName.trim(), tableName.trim()); + HiveStringUtils.normalizeIdentifier(dbName), + HiveStringUtils.normalizeIdentifier(tableName)); pm.retrieveAll(mStatsObjColl); if (mStatsObjColl != null) { @@ -6295,8 +6305,10 @@ public boolean deleteTableColumnStatistics(String dbName, String tableName, Stri if (colName != null) { query.setUnique(true); - mStatsObj = (MTableColumnStatistics)query.execute(tableName.trim(), - dbName.trim(), colName.trim()); + mStatsObj = (MTableColumnStatistics)query.execute( + HiveStringUtils.normalizeIdentifier(tableName), + HiveStringUtils.normalizeIdentifier(dbName), + HiveStringUtils.normalizeIdentifier(colName)); pm.retrieve(mStatsObj); if (mStatsObj != null) { @@ -6306,7 +6318,9 @@ public boolean deleteTableColumnStatistics(String dbName, String tableName, Stri + tableName + " col=" + colName); } } else { - mStatsObjColl= (List)query.execute(tableName.trim(), dbName.trim()); + mStatsObjColl= (List)query.execute( + HiveStringUtils.normalizeIdentifier(tableName), + HiveStringUtils.normalizeIdentifier(dbName)); pm.retrieveAll(mStatsObjColl); if (mStatsObjColl != null) { @@ -6685,8 +6699,8 @@ public boolean doesPartitionExist(String dbName, String tableName, List boolean success = false; try { openTransaction(); - dbName = dbName.toLowerCase().trim(); - tableName = tableName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); + tableName = HiveStringUtils.normalizeIdentifier(tableName); // TODO: this could also be passed from upper layer; or this method should filter the list. MTable mtbl = getMTable(dbName, tableName); @@ -6818,8 +6832,8 @@ public void alterFunction(String dbName, String funcName, Function newFunction) boolean success = false; try { openTransaction(); - funcName = funcName.toLowerCase(); - dbName = dbName.toLowerCase(); + funcName = HiveStringUtils.normalizeIdentifier(funcName); + dbName = HiveStringUtils.normalizeIdentifier(dbName); MFunction newf = convertToMFunction(newFunction); if (newf == null) { throw new InvalidObjectException("new function is invalid"); @@ -6831,7 +6845,7 @@ public void alterFunction(String dbName, String funcName, Function newFunction) } // For now only alter name, owner, class name, type - oldf.setFunctionName(newf.getFunctionName().toLowerCase()); + oldf.setFunctionName(HiveStringUtils.normalizeIdentifier(newf.getFunctionName())); oldf.setDatabase(newf.getDatabase()); oldf.setOwnerName(newf.getOwnerName()); oldf.setOwnerType(newf.getOwnerType()); @@ -6872,8 +6886,8 @@ private MFunction getMFunction(String db, String function) { boolean commited = false; try { openTransaction(); - db = db.toLowerCase().trim(); - function = function.toLowerCase().trim(); + db = HiveStringUtils.normalizeIdentifier(db); + function = HiveStringUtils.normalizeIdentifier(function); Query query = pm.newQuery(MFunction.class, "functionName == function && database.name == db"); query.declareParameters("java.lang.String function, java.lang.String db"); query.setUnique(true); @@ -6911,7 +6925,7 @@ public Function getFunction(String dbName, String funcName) throws MetaException List funcs = null; try { openTransaction(); - dbName = dbName.toLowerCase().trim(); + dbName = HiveStringUtils.normalizeIdentifier(dbName); // Take the pattern and split it on the | to get all the composing // patterns String[] subpatterns = pattern.trim().split("\\|"); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java index 0e1fafc..4c9299c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TApplicationException; import org.apache.thrift.TException; import org.apache.thrift.protocol.TProtocolException; @@ -122,11 +123,16 @@ public Object invoke(Object proxy, Method method, Object[] args) throws Throwabl * @throws MetaException */ private void reloginExpiringKeytabUser() throws MetaException { - if(!ShimLoader.getHadoopShims().isSecurityEnabled()){ + if(!UserGroupInformation.isSecurityEnabled()){ return; } try { - ShimLoader.getHadoopShims().reLoginUserFromKeytab(); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + //checkTGT calls ugi.relogin only after checking if it is close to tgt expiry + //hadoop relogin is actually done only every x minutes (x=10 in hadoop 1.x) + if(ugi.isFromKeytab()){ + ugi.checkTGTAndReloginFromKeytab(); + } } catch (IOException e) { String msg = "Error doing relogin using keytab " + e.getMessage(); LOG.error(msg, e); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java b/metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java index ef1eee2..ec8d608 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/TUGIBasedProcessor.java @@ -25,11 +25,12 @@ import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.set_ugi_args; import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.set_ugi_result; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.TUGIContainingTransport; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.ProcessFunction; @@ -56,7 +57,7 @@ private final I iface; private final Map> functions; - private final HadoopShims shim; + static final Log LOG = LogFactory.getLog(TUGIBasedProcessor.class); public TUGIBasedProcessor(I iface) throws SecurityException, NoSuchFieldException, IllegalArgumentException, IllegalAccessException, NoSuchMethodException, @@ -64,7 +65,6 @@ public TUGIBasedProcessor(I iface) throws SecurityException, NoSuchFieldExceptio super(iface); this.iface = iface; this.functions = getProcessMapView(); - shim = ShimLoader.getHadoopShims(); } @SuppressWarnings("unchecked") @@ -115,7 +115,7 @@ public Void run() { } }; try { - shim.doAs(clientUgi, pvea); + clientUgi.doAs(pvea); return true; } catch (RuntimeException rte) { if (rte.getCause() instanceof TException) { @@ -127,7 +127,11 @@ public Void run() { } catch (IOException ioe) { throw new RuntimeException(ioe); // unexpected! } finally { - shim.closeAllForUGI(clientUgi); + try { + FileSystem.closeAllForUGI(clientUgi); + } catch (IOException e) { + LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, e); + } } } } @@ -160,8 +164,7 @@ private void handleSetUGI(TUGIContainingTransport ugiTrans, set_ugi_result result = fn.getResult(iface, args); List principals = result.getSuccess(); // Store the ugi in transport and then continue as usual. - ugiTrans.setClientUGI(shim.createRemoteUser(principals.remove(principals.size()-1), - principals)); + ugiTrans.setClientUGI(UserGroupInformation.createRemoteUser(principals.remove(principals.size()-1))); oprot.writeMessageBegin(new TMessage(msg.name, TMessageType.REPLY, msg.seqid)); result.write(oprot); oprot.writeMessageEnd(); diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java b/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java index c99ce5f..f824da3 100755 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; @@ -261,11 +262,11 @@ public boolean isWritable(Path path) throws IOException { } final UserGroupInformation ugi; try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + ugi = Utils.getUGIForConf(conf); } catch (LoginException le) { throw new IOException(le); } - String user = ShimLoader.getHadoopShims().getShortUserName(ugi); + String user = ugi.getShortUserName(); //check whether owner can delete if (stat.getOwner().equals(user) && stat.getPermission().getUserAction().implies(FsAction.WRITE)) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java b/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java index 60b041b..781ac63 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java @@ -446,7 +446,7 @@ private String getJdoFilterPushdownParam(Table table, int partColIndex, // columns have been excluded above, so it will either compare w/string or fail. Object val = value; if (value instanceof Date) { - val = HiveMetaStore.PARTITION_DATE_FORMAT.format((Date)value); + val = HiveMetaStore.PARTITION_DATE_FORMAT.get().format((Date)value); } boolean isStringValue = val instanceof String; if (!isStringValue && (!isIntegralSupported || !(val instanceof Long))) { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g b/metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g index 7d62a32..8aef5bf 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/parser/Filter.g @@ -46,11 +46,15 @@ import java.util.regex.Pattern; public String errorMsg; private static final Pattern datePattern = Pattern.compile(".*(\\d\\d\\d\\d-\\d\\d-\\d\\d).*"); - private static final SimpleDateFormat dateFormat; - static { - dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - dateFormat.setLenient(false); - } + private static final ThreadLocal dateFormat = + new ThreadLocal() { + @Override + protected SimpleDateFormat initialValue() { + SimpleDateFormat val = new SimpleDateFormat("yyyy-MM-dd"); + val.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. + return val; + }; + }; public static java.sql.Date ExtractDate (String input) { Matcher m = datePattern.matcher(input); @@ -58,7 +62,7 @@ import java.util.regex.Pattern; return null; } try { - return new java.sql.Date(dateFormat.parse(m.group(1)).getTime()); + return new java.sql.Date(dateFormat.get().parse(m.group(1)).getTime()); } catch (ParseException pe) { return null; } diff --git a/pom.xml b/pom.xml index 900dddb..a63413d 100644 --- a/pom.xml +++ b/pom.xml @@ -100,6 +100,7 @@ 3.4 1.7.5 0.8.0.RELEASE + 0.9.2-incubating 3.2.6 3.2.10 3.2.9 @@ -114,10 +115,9 @@ 1.1.3 1.5.4 1.4 - 10.10.1.1 + 10.11.1.1 14.0.1 2.1.6 - 0.20.2 1.2.1 2.5.0 ${basedir}/${hive.path.to.root}/testutils/hadoop @@ -132,15 +132,15 @@ 3.0.1 7.6.0.v20120127 1.14 - 0.9.94 + 2.12 1.11 1.1 3.5.2 20090211 4.11 2.22 - 0.9.0 - 0.9.0 + 0.9.2 + 0.9.2 1.2.16 2.3 1.9.5 @@ -150,7 +150,7 @@ requires netty < 3.6.0 we force hadoops version --> 3.7.0.Final - 1.5.0 + 1.6.0rc3 0.12.0 2.5.0 1.0.1 @@ -791,6 +791,28 @@ org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-no-snapshots + + enforce + + + + + Release builds are not allowed to have SNAPSHOT depenendencies + true + true + + + true + + + + + + org.apache.maven.plugins maven-surefire-plugin diff --git a/ql/pom.xml b/ql/pom.xml index 324bfeb..de62cac 100644 --- a/ql/pom.xml +++ b/ql/pom.xml @@ -28,7 +28,6 @@ Hive Query Language - 0.9.2-incubating .. @@ -471,6 +470,12 @@ org.apache.hadoop + hadoop-archives + ${hadoop-23.version} + true + + + org.apache.hadoop hadoop-mapreduce-client-core ${hadoop-23.version} true diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 70470c8..3fcf120 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -115,6 +115,7 @@ import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; @@ -520,8 +521,8 @@ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); try { - task.getJSONPlan(ps, astStringTree, sem.getRootTasks(), sem.getFetchTask(), - false, true, true); + List> rootTasks = sem.getRootTasks(); + task.getJSONPlan(ps, astStringTree, rootTasks, sem.getFetchTask(), false, true, true); ret = baos.toString(); } catch (Exception e) { LOG.warn("Exception generating explain output: " + e, e); @@ -1348,7 +1349,7 @@ public int execute() throws CommandNeedRetryException { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), - ShimLoader.getHadoopShims().getUGIForConf(conf)); + Utils.getUGIForConf(conf)); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); } @@ -1518,7 +1519,7 @@ public int execute() throws CommandNeedRetryException { ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() - : null), ShimLoader.getHadoopShims().getUGIForConf(conf)); + : null), Utils.getUGIForConf(conf)); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 292c83c..89775cc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -423,6 +423,9 @@ "sorted, table {0}", true), ALTER_TABLE_TYPE_PARTIAL_PARTITION_SPEC_NO_SUPPORTED(10299, "Alter table partition type {0} does not allow partial partition spec", true), + ALTER_TABLE_PARTITION_CASCADE_NOT_SUPPORTED(10300, + "Alter table partition type {0} does not support cascade", true), + //========================== 20000 range starts here ========================// SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java index f834ad5..54b61a9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java @@ -164,15 +164,17 @@ public HarPathHelper(HiveConf hconf, URI archive, URI originalBase) throws HiveE } } - public URI getHarUri(URI original, HadoopShims shim) throws HiveException { - URI harUri = null; - try { - harUri = shim.getHarUri(original, base, originalBase); - } catch (URISyntaxException e) { - throw new HiveException("Couldn't create har URI for location", e); + public URI getHarUri(URI original) throws URISyntaxException { + URI relative = originalBase.relativize(original); + if (relative.isAbsolute()) { + throw new URISyntaxException("Couldn't create URI for location.", + "Relative: " + relative + " Base: " + + base + " OriginalBase: " + originalBase); } - return harUri; + return base.resolve(relative); + + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index e54b80e..1d0ed51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -172,6 +172,7 @@ import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.tools.HadoopArchives; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.hive.common.util.AnnotationUtils; @@ -1297,7 +1298,6 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition, // if it does not already exist. If it does exist, we assume the dir is good // to use as the move operation that created it is atomic. - HadoopShims shim = ShimLoader.getHadoopShims(); if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) { @@ -1319,7 +1319,16 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, tbl.getTableName(), partSpecInfo.getName()); jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6); conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname); - ret = shim.createHadoopArchive(conf, originalDir, tmpPath, archiveName); + HadoopArchives har = new HadoopArchives(conf); + List args = new ArrayList(); + + args.add("-archiveName"); + args.add(archiveName); + args.add("-p"); + args.add(originalDir.toString()); + args.add(tmpPath.toString()); + + ret = ToolRunner.run(har, args.toArray(new String[0]));; } catch (Exception e) { throw new HiveException(e); } @@ -1380,8 +1389,7 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, try { for(Partition p: partitions) { URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri()); - URI test = p.getDataLocation().toUri(); - URI harPartitionDir = harHelper.getHarUri(originalPartitionUri, shim); + URI harPartitionDir = harHelper.getHarUri(originalPartitionUri); StringBuilder authority = new StringBuilder(); if(harPartitionDir.getUserInfo() != null) { authority.append(harPartitionDir.getUserInfo()).append("@"); @@ -1414,7 +1422,7 @@ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, } private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) - throws HiveException { + throws HiveException, URISyntaxException { Table tbl = db.getTable(simpleDesc.getTableName()); @@ -1489,8 +1497,7 @@ private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) URI archiveUri = archivePath.toUri(); ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri); - HadoopShims shim = ShimLoader.getHadoopShims(); - URI sourceUri = harHelper.getHarUri(originalUri, shim); + URI sourceUri = harHelper.getHarUri(originalUri); Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath()); if(!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) { @@ -3281,7 +3288,7 @@ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { List allPartitions = null; if (alterTbl.getPartSpec() != null) { - Map partSpec = alterTbl.getPartSpec(); + Map partSpec = alterTbl.getPartSpec(); if (DDLSemanticAnalyzer.isFullSpec(tbl, partSpec)) { allPartitions = new ArrayList(); Partition part = db.getPartition(tbl, partSpec, false); @@ -3321,7 +3328,7 @@ private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException { try { if (allPartitions == null) { - db.alterTable(alterTbl.getOldName(), tbl); + db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade()); } else { db.alterPartitions(tbl.getTableName(), allPartitions); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 4fb30bc..4f3d504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -44,7 +44,6 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HivePartitioner; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.io.RecordUpdater; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -329,7 +328,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { taskId = Utilities.getTaskId(hconf); initializeSpecPath(); fs = specPath.getFileSystem(hconf); - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo()); isCompressed = conf.getCompressed(); parent = Utilities.toTempPath(conf.getDirName()); statsCollectRawDataSize = conf.isStatsCollectRawDataSize(); @@ -338,6 +337,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { serializer.initialize(null, conf.getTableInfo().getProperties()); outputClass = serializer.getSerializedClass(); + if (isLogInfoEnabled) { + LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + + (isCompressed ? " with compression" : "")); + } + // Timeout is chosen to make sure that even if one iteration takes more than // half of the script.timeout but less than script.timeout, we will still // be able to report progress. @@ -1046,26 +1050,13 @@ public void augmentPlan() { public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { if (hiveOutputFormat == null) { + Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), job); try { - if (getConf().getTableInfo().getJobProperties() != null) { - //Setting only for Storage Handler - if (getConf().getTableInfo().getJobProperties().get(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY) != null) { - job.set(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY,getConf().getTableInfo().getJobProperties().get(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY)); - hiveOutputFormat = ReflectionUtils.newInstance(conf.getTableInfo().getOutputFileFormatClass(),job); - } - else { - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); - } - } - else { - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); - } + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(job, getConf().getTableInfo()); } catch (Exception ex) { throw new IOException(ex); } } - Utilities.copyTableJobPropertiesToConf(conf.getTableInfo(), job); - if (conf.getTableInfo().isNonNative()) { //check the ouput specs only if it is a storage handler (native tables's outputformats does //not set the job's output properties correctly) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java index 074255b..bd00bd4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java @@ -47,6 +47,8 @@ private Class tableFunctionResolver; + private boolean blockedFunction; + public FunctionInfo(boolean isNative, String displayName, GenericUDF genericUDF) { this.isNative = isNative; @@ -190,4 +192,13 @@ public boolean isGenericUDTF() { public boolean isTableFunction() { return null != tableFunctionResolver; } + + public boolean isBlockedFunction() { + return blockedFunction; + } + + public void setBlockedFunction(boolean blockedFunction) { + this.blockedFunction = blockedFunction; + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 6323387..86f4633 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -29,6 +29,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.regex.Pattern; @@ -41,6 +42,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; @@ -148,6 +150,9 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; + /** * FunctionRegistry. @@ -668,8 +673,12 @@ public static String getNormalizedFunctionName(String fn) { return functionInfo; } - public static FunctionInfo getFunctionInfo(String functionName) { - return getFunctionInfo(mFunctions, functionName); + public static FunctionInfo getFunctionInfo(String functionName) throws SemanticException { + FunctionInfo functionInfo = getFunctionInfo(mFunctions, functionName); + if (functionInfo != null && functionInfo.isBlockedFunction()) { + throw new SemanticException ("UDF " + functionName + " is not allowed"); + } + return functionInfo; } /** @@ -771,7 +780,13 @@ public static Hive getHive() throws HiveException { public static Set getFunctionSynonyms(String funcName) { Set synonyms = new HashSet(); - FunctionInfo funcInfo = getFunctionInfo(funcName); + FunctionInfo funcInfo; + try { + funcInfo = getFunctionInfo(funcName); + } catch (SemanticException e) { + LOG.warn("Failed to load " + funcName); + funcInfo = null; + } if (null == funcInfo) { return synonyms; } @@ -1246,7 +1261,7 @@ public static void unregisterTemporaryUDF(String functionName) throws HiveExcept } } - public static GenericUDAFResolver getGenericUDAFResolver(String functionName) { + public static GenericUDAFResolver getGenericUDAFResolver(String functionName) throws SemanticException { if (LOG.isDebugEnabled()) { LOG.debug("Looking up GenericUDAF: " + functionName); } @@ -1543,16 +1558,18 @@ public static Method getMethodInternal(Class udfClass, List mlist, bo /** * A shortcut to get the "index" GenericUDF. This is used for getting elements * out of array and getting values out of map. + * @throws SemanticException */ public static GenericUDF getGenericUDFForIndex() { - return FunctionRegistry.getFunctionInfo("index").getGenericUDF(); + return FunctionRegistry.getFunctionInfo(mFunctions, "index").getGenericUDF(); } /** * A shortcut to get the "and" GenericUDF. + * @throws SemanticException */ public static GenericUDF getGenericUDFForAnd() { - return FunctionRegistry.getFunctionInfo("and").getGenericUDF(); + return FunctionRegistry.getFunctionInfo(mFunctions, "and").getGenericUDF(); } /** @@ -1924,8 +1941,9 @@ public static WindowFunctionInfo getWindowFunctionInfo(String functionName) { * name of function * @return true if a GenericUDF or GenericUDAF exists for this name and implyOrder is true, false * otherwise. + * @throws SemanticException */ - public static boolean impliesOrder(String functionName) { + public static boolean impliesOrder(String functionName) throws SemanticException { FunctionInfo info = getFunctionInfo(functionName); if (info != null) { @@ -1951,13 +1969,13 @@ static private void addFunctionInfoToWindowFunctions(String functionName, windowFunctions.put(functionName.toLowerCase(), wInfo); } - public static boolean isTableFunction(String name) + public static boolean isTableFunction(String name) throws SemanticException { FunctionInfo tFInfo = getFunctionInfo(name); return tFInfo != null && !tFInfo.isInternalTableFunction() && tFInfo.isTableFunction(); } - public static TableFunctionResolver getTableFunctionResolver(String name) + public static TableFunctionResolver getTableFunctionResolver(String name) throws SemanticException { FunctionInfo tfInfo = getFunctionInfo(name); if(tfInfo.isTableFunction()) { @@ -1966,7 +1984,7 @@ public static TableFunctionResolver getTableFunctionResolver(String name) return null; } - public static TableFunctionResolver getWindowingTableFunction() + public static TableFunctionResolver getWindowingTableFunction() throws SemanticException { return getTableFunctionResolver(WINDOWING_TABLE_FUNCTION); } @@ -1993,8 +2011,9 @@ public static void registerTableFunction(String name, Class whiteList = Lists.newArrayList( + Splitter.on(",").trimResults().omitEmptyStrings().split(whiteListStr)); + List blackList = Lists.newArrayList( + Splitter.on(",").trimResults().omitEmptyStrings().split(blackListStr)); + + for ( Entry funcEntry : mFunctions.entrySet()) { + funcEntry.getValue().setBlockedFunction( + isUdfBlocked(funcEntry.getKey(), whiteList, blackList)); + } + } + + /** + * Check if the function belongs to whitelist or blacklist + * @param functionName + * @param whiteList + * @param blackList + * @return true if the given udf is to be blocked + */ + private static boolean isUdfBlocked(String functionName, + List whiteList, List blackList) { + boolean inWhiteList = false; + boolean inBlackList = false; + + if (whiteList.isEmpty()) { + // if whitelist is empty, all udfs are allowed + inWhiteList = true; + } else { + for (String allowedFunction : whiteList) { + if (functionName.equalsIgnoreCase(allowedFunction)) { + inWhiteList = true; + break; + } + } + } + + for (String blockedFunction : blackList) { + if (functionName.equalsIgnoreCase(blockedFunction)) { + inBlackList = true; + break; + } + } + + // blacklist setting takes presendence on whitelist + return !inWhiteList || inBlackList; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java index 832f84f..619aa1f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SecureCmdDoAs.java @@ -17,13 +17,18 @@ */ package org.apache.hadoop.hive.ql.exec; +import java.io.File; import java.io.IOException; import java.util.Map; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; /** * SecureCmdDoAs - Helper class for setting parameters and env necessary for @@ -35,11 +40,23 @@ private final Path tokenPath; public SecureCmdDoAs(HiveConf conf) throws HiveException, IOException{ - tokenPath = ShimLoader.getHadoopShims().createDelegationTokenFile(conf); + // Get delegation token for user from filesystem and write the token along with + // metastore tokens into a file + String uname = UserGroupInformation.getLoginUser().getShortUserName(); + FileSystem fs = FileSystem.get(conf); + Token fsToken = fs.getDelegationToken(uname); + + File t = File.createTempFile("hive_hadoop_delegation_token", null); + tokenPath = new Path(t.toURI()); + + //write credential with token to file + Credentials cred = new Credentials(); + cred.addToken(fsToken.getService(), fsToken); + cred.writeTokenStorageFile(tokenPath, conf); } public void addEnv(Map env){ - env.put(ShimLoader.getHadoopShims().getTokenFileLocEnvName(), + env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, tokenPath.toUri().getPath()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 95d2d76..93017d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; @@ -133,22 +134,61 @@ public boolean acceptLimitPushdown() { * @return if it is an identity select operator or not */ public boolean isIdentitySelect() { - //Safety check + // Safety check if(this.getNumParent() != 1) { return false; } - //Select * - if(this.getConf().isSelStarNoCompute() || - this.getConf().isSelectStar()) { + if(conf.isSelStarNoCompute()) { return true; } - //Check whether the have the same schema - if(!OperatorUtils.sameRowSchema(this, this.getParentOperators().get(0))) { + // Check whether the have the same schema + RowSchema orig = this.getSchema(); + RowSchema dest = this.getParentOperators().get(0).getSchema(); + if(orig.getSignature() == null && dest.getSignature() == null) { + return true; + } + if((orig.getSignature() == null && dest.getSignature() != null) || + (orig.getSignature() != null && dest.getSignature() == null) ) { + return false; + } + + if(orig.getSignature().size() != dest.getSignature().size() || + orig.getSignature().size() != conf.getColList().size()) { return false; } + for(int i=0; i pa Serializer serializer = (Serializer) tableInfo.getDeserializerClass().newInstance(); serializer.initialize(null, tableInfo.getProperties()); outputClass = serializer.getSerializedClass(); - hiveOutputFormat = conf.getTableInfo().getOutputFileFormatClass().newInstance(); + hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, conf.getTableInfo()); } catch (SerDeException e) { throw new HiveException(e); } catch (InstantiationException e) { @@ -3354,7 +3354,7 @@ public static double getHighestSamplePercentage (MapWork work) { @SuppressWarnings({"rawtypes", "unchecked"}) private static Path createEmptyFile(Path hiveScratchDir, - Class outFileFormat, JobConf job, + HiveOutputFormat outFileFormat, JobConf job, int sequenceNumber, Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException { @@ -3370,7 +3370,7 @@ private static Path createEmptyFile(Path hiveScratchDir, String newFile = newDir + Path.SEPARATOR + "emptyFile"; Path newFilePath = new Path(newFile); - RecordWriter recWriter = outFileFormat.newInstance().getHiveRecordWriter(job, newFilePath, + RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath, Text.class, false, props, null); if (dummyRow) { // empty files are omitted at CombineHiveInputFormat. @@ -3386,28 +3386,29 @@ private static Path createEmptyFile(Path hiveScratchDir, @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, Path hiveScratchDir, String alias, int sequenceNumber) - throws IOException, InstantiationException, IllegalAccessException { + throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file PartitionDesc partDesc = work.getPathToPartitionInfo().get(strPath); - boolean nonNative = partDesc.getTableDesc().isNonNative(); - boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; - Properties props = SerDeUtils.createOverlayedProperties( - partDesc.getTableDesc().getProperties(), partDesc.getProperties()); - Class outFileFormat = partDesc.getOutputFileFormatClass(); - - if (nonNative) { + if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } + Properties props = SerDeUtils.createOverlayedProperties( + partDesc.getTableDesc().getProperties(), partDesc.getProperties()); + HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); + + boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; + Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, sequenceNumber, props, oneRow); - - LOG.info("Changed input file to " + newPath); + if (LOG.isInfoEnabled()) { + LOG.info("Changed input file " + strPath + " to empty file " + newPath); + } // update the work String strNewPath = newPath.toString(); @@ -3429,23 +3430,23 @@ private static Path createDummyFileForEmptyPartition(Path path, JobConf job, Map @SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias, int sequenceNumber) - throws IOException, InstantiationException, IllegalAccessException { + throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); - Properties props = tableDesc.getProperties(); - boolean nonNative = tableDesc.isNonNative(); - Class outFileFormat = tableDesc.getOutputFileFormatClass(); - - if (nonNative) { + if (tableDesc.isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return null; } + Properties props = tableDesc.getProperties(); + HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); + Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, sequenceNumber, props, false); - - LOG.info("Changed input file to " + newPath.toString()); + if (LOG.isInfoEnabled()) { + LOG.info("Changed input file for alias " + alias + " to " + newPath); + } // update the work diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java index 12433ca..d124f09 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java @@ -34,6 +34,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.http.HtmlQuoting; import org.apache.hadoop.mapred.JobConf; /** @@ -215,8 +216,7 @@ void addErrorAndSolution(ErrorAndSolution e) { break; } - inputLine = - ShimLoader.getHadoopShims().unquoteHtmlChars(inputLine); + inputLine = HtmlQuoting.unquoteHtmlChars(inputLine); if (stackTracePattern.matcher(inputLine).matches() || endStackTracePattern.matcher(inputLine).matches()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java index 18e40b3..2227e6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionState; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.io.IOPrepareCache; @@ -85,6 +86,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Partitioner; import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.log4j.Appender; import org.apache.log4j.BasicConfigurator; import org.apache.log4j.FileAppender; @@ -227,7 +229,7 @@ public int execute(DriverContext driverContext) { return 5; } - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); //See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); @@ -276,9 +278,6 @@ public int execute(DriverContext driverContext) { useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); @@ -665,9 +664,8 @@ public static void main(String[] args) throws IOException, HiveException { conf.set("tmpfiles", files); } - if(ShimLoader.getHadoopShims().isSecurityEnabled()){ - String hadoopAuthToken = - System.getenv(ShimLoader.getHadoopShims().getTokenFileLocEnvName()); + if(UserGroupInformation.isSecurityEnabled()){ + String hadoopAuthToken = UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION; if(hadoopAuthToken != null){ conf.set("mapreduce.job.credentials.binary", hadoopAuthToken); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java index cda0629..6a6593c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java @@ -231,7 +231,7 @@ private MapRedStats progress(ExecDriverTaskHandle th) throws IOException { int numMap = -1; int numReduce = -1; List clientStatPublishers = getClientStatPublishers(); - + final boolean localMode = ShimLoader.getHadoopShims().isLocalMode(job); Heartbeater heartbeater = new Heartbeater(th.getTxnManager(), job); while (!rj.isComplete()) { @@ -250,51 +250,53 @@ private MapRedStats progress(ExecDriverTaskHandle th) throws IOException { initializing = false; } - if (!initOutputPrinted) { - SessionState ss = SessionState.get(); + if (!localMode) { + if (!initOutputPrinted) { + SessionState ss = SessionState.get(); - String logMapper; - String logReducer; + String logMapper; + String logReducer; - TaskReport[] mappers = jc.getMapTaskReports(rj.getID()); - if (mappers == null) { - logMapper = "no information for number of mappers; "; - } else { - numMap = mappers.length; - if (ss != null) { - ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(), - Keys.TASK_NUM_MAPPERS, Integer.toString(numMap)); + TaskReport[] mappers = jc.getMapTaskReports(rj.getID()); + if (mappers == null) { + logMapper = "no information for number of mappers; "; + } else { + numMap = mappers.length; + if (ss != null) { + ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(), + Keys.TASK_NUM_MAPPERS, Integer.toString(numMap)); + } + logMapper = "number of mappers: " + numMap + "; "; } - logMapper = "number of mappers: " + numMap + "; "; - } - TaskReport[] reducers = jc.getReduceTaskReports(rj.getID()); - if (reducers == null) { - logReducer = "no information for number of reducers. "; - } else { - numReduce = reducers.length; - if (ss != null) { - ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(), - Keys.TASK_NUM_REDUCERS, Integer.toString(numReduce)); + TaskReport[] reducers = jc.getReduceTaskReports(rj.getID()); + if (reducers == null) { + logReducer = "no information for number of reducers. "; + } else { + numReduce = reducers.length; + if (ss != null) { + ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(), + Keys.TASK_NUM_REDUCERS, Integer.toString(numReduce)); + } + logReducer = "number of reducers: " + numReduce; } - logReducer = "number of reducers: " + numReduce; - } - console - .printInfo("Hadoop job information for " + getId() + ": " + logMapper + logReducer); - initOutputPrinted = true; - } + console + .printInfo("Hadoop job information for " + getId() + ": " + logMapper + logReducer); + initOutputPrinted = true; + } - RunningJob newRj = jc.getJob(rj.getID()); - if (newRj == null) { - // under exceptional load, hadoop may not be able to look up status - // of finished jobs (because it has purged them from memory). From - // hive's perspective - it's equivalent to the job having failed. - // So raise a meaningful exception - throw new IOException("Could not find status of job:" + rj.getID()); - } else { - th.setRunningJob(newRj); - rj = newRj; + RunningJob newRj = jc.getJob(rj.getID()); + if (newRj == null) { + // under exceptional load, hadoop may not be able to look up status + // of finished jobs (because it has purged them from memory). From + // hive's perspective - it's equivalent to the job having failed. + // So raise a meaningful exception + throw new IOException("Could not find status of job:" + rj.getID()); + } else { + th.setRunningJob(newRj); + rj = newRj; + } } // If fatal errors happen we should kill the job immediately rather than diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java index d0c022b..e4f910d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java @@ -66,7 +66,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hive.common.util.StreamPrinter; @@ -237,8 +239,7 @@ public int executeInChildVM(DriverContext driverContext) { //Set HADOOP_USER_NAME env variable for child process, so that // it also runs with hadoop permissions for the user the job is running as // This will be used by hadoop only in unsecure(/non kerberos) mode - HadoopShims shim = ShimLoader.getHadoopShims(); - String endUserName = shim.getShortUserName(shim.getUGIForConf(job)); + String endUserName = Utils.getUGIForConf(job).getShortUserName(); LOG.debug("setting HADOOP_USER_NAME\t" + endUserName); variables.put("HADOOP_USER_NAME", endUserName); @@ -265,8 +266,8 @@ public int executeInChildVM(DriverContext driverContext) { } - if(ShimLoader.getHadoopShims().isSecurityEnabled() && - ShimLoader.getHadoopShims().isLoginKeytabBased()) { + if(UserGroupInformation.isSecurityEnabled() && + UserGroupInformation.isLoginKeytabBased()) { //If kerberos security is enabled, and HS2 doAs is enabled, // then additional params need to be set so that the command is run as // intended user diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java index 5271e91..b07fdcf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java @@ -523,7 +523,7 @@ protected void setupWriter() throws HiveException { tmpFile.deleteOnExit(); // rFile = new RandomAccessFile(tmpFile, "rw"); - HiveOutputFormat hiveOutputFormat = tblDesc.getOutputFileFormatClass().newInstance(); + HiveOutputFormat hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(jc, tblDesc); tempOutPath = new Path(tmpFile.toString()); JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index 6ef196d..438efab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -156,9 +156,6 @@ private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveExceptio } String inpFormat = HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (StringUtils.isBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java index c0a29df..d77866a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import java.util.Collections; @@ -133,12 +134,11 @@ public SparkSession getSession(SparkSession existingSession, HiveConf conf, */ private boolean canReuseSession(SparkSession existingSession, HiveConf conf) throws HiveException { try { - UserGroupInformation newUgi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String newUserName = ShimLoader.getHadoopShims().getShortUserName(newUgi); + UserGroupInformation newUgi = Utils.getUGIForConf(conf); + String newUserName = newUgi.getShortUserName(); - UserGroupInformation ugiInSession = - ShimLoader.getHadoopShims().getUGIForConf(existingSession.getConf()); - String userNameInSession = ShimLoader.getHadoopShims().getShortUserName(ugiInSession); + UserGroupInformation ugiInSession = Utils.getUGIForConf(existingSession.getConf()); + String userNameInSession = ugiInSession.getShortUserName(); return newUserName.equals(userNameInSession); } catch(Exception ex) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 0e326cf..ed68141 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -20,7 +20,9 @@ import com.google.common.base.Function; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; + import javax.security.auth.login.LoginException; + import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; @@ -54,6 +56,7 @@ import org.apache.hadoop.hive.ql.exec.tez.tools.TezMergedLogicalInput; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils.NullOutputCommitter; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; @@ -72,8 +75,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.stats.StatsPublisher; -import org.apache.hadoop.hive.shims.HadoopShimsSecure.NullOutputCommitter; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.mapred.FileOutputFormat; @@ -203,9 +205,6 @@ private JobConf initializeVertexConf(JobConf baseConf, Context context, MapWork Utilities.setInputAttributes(conf, mapWork); String inpFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } if (mapWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); @@ -761,8 +760,8 @@ public PreWarmVertex createPreWarmVertex(TezConfiguration conf, */ @SuppressWarnings("deprecation") public Path getDefaultDestDir(Configuration conf) throws LoginException, IOException { - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + String userName = ugi.getShortUserName(); String userPathStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USER_INSTALL_DIR); Path userPath = new Path(userPathStr); FileSystem fs = userPath.getFileSystem(conf); @@ -1125,8 +1124,8 @@ public Path createTezDir(Path scratchDir, Configuration conf) UserGroupInformation ugi; String userName = System.getProperty("user.name"); try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + ugi = Utils.getUGIForConf(conf); + userName = ugi.getShortUserName(); } catch (LoginException e) { throw new IOException(e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java index dea3460..3de5556 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java @@ -62,7 +62,7 @@ import java.util.SortedSet; import java.util.TreeSet; -import jline.Terminal; +import jline.TerminalFactory; /** * TezJobMonitor keeps track of a tez job while it's being executed. It will @@ -232,7 +232,7 @@ public void repositionCursor() { * @return - width of terminal */ public int getTerminalWidth() { - return Terminal.getTerminal().getTerminalWidth(); + return TerminalFactory.get().getWidth(); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java index e5fce14..9f6c31c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -212,8 +213,8 @@ private boolean canWorkWithSameSession(TezSessionState session, HiveConf conf) } try { - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + String userName = ugi.getShortUserName(); LOG.info("The current user: " + userName + ", session user: " + session.getUser()); if (userName.equals(session.getUser()) == false) { LOG.info("Different users incoming: " + userName + " existing: " + session.getUser()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java index 65a0090..3d01142 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; @@ -136,9 +137,8 @@ public void open(HiveConf conf, String[] additionalFiles) this.queueName = conf.get("tez.queue.name"); this.doAsEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS); - UserGroupInformation ugi; - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - user = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + user = ugi.getShortUserName(); LOG.info("User of session id " + sessionId + " is " + user); // create the tez tmp dir diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index dbc9bb2..54b935f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -756,7 +757,13 @@ public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) { if (udfName == null) { return false; } - FunctionInfo funcInfo = FunctionRegistry.getFunctionInfo(udfName); + FunctionInfo funcInfo; + try { + funcInfo = FunctionRegistry.getFunctionInfo(udfName); + } catch (SemanticException e) { + LOG.warn("Failed to load " + udfName, e); + funcInfo = null; + } if (funcInfo == null) { return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java index 260444f..123bb5c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** * Hook Context keeps all the necessary information for all the hooks. @@ -61,7 +62,7 @@ public HookContext(QueryPlan queryPlan, HiveConf conf, completeTaskList = new ArrayList(); inputs = queryPlan.getInputs(); outputs = queryPlan.getOutputs(); - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + ugi = Utils.getUGIForConf(conf); linfo= null; if(SessionState.get() != null){ linfo = SessionState.get().getLineageState().getLineageInfo(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java index 481deba..6f30c9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputSplit.java @@ -20,43 +20,12 @@ import java.io.DataInput; import java.io.DataOutput; -import java.io.File; import java.io.IOException; -import java.io.Serializable; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; + import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveInputFormat.HiveInputSplit; -import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobConfigurable; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.ReflectionUtils; /** @@ -101,6 +70,7 @@ public InputSplit getSplit(int idx) { return inputSplits[idx]; } + @Override public String inputFormatClassName() { return inputFormatClassName; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java index 35db50c..7757367 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java @@ -41,15 +41,13 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.shims.HadoopShims.CombineFileInputFormatShim; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; +import org.apache.hadoop.hive.shims.HadoopShimsSecure.InputSplitShim; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -61,6 +59,7 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.mapred.lib.CombineFileSplit; /** @@ -81,21 +80,21 @@ * from different files. Since, they belong to a single directory, there is a * single inputformat for all the chunks. */ - public static class CombineHiveInputSplit implements InputSplitShim { + public static class CombineHiveInputSplit extends InputSplitShim { String inputFormatClassName; - InputSplitShim inputSplitShim; + CombineFileSplit inputSplitShim; public CombineHiveInputSplit() throws IOException { this(ShimLoader.getHadoopShims().getCombineFileInputFormat() .getInputSplitShim()); } - public CombineHiveInputSplit(InputSplitShim inputSplitShim) throws IOException { + public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws IOException { this(inputSplitShim.getJob(), inputSplitShim); } - public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) + public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim) throws IOException { this.inputSplitShim = inputSplitShim; if (job != null) { @@ -114,7 +113,7 @@ public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) } } - public InputSplitShim getInputSplitShim() { + public CombineFileSplit getInputSplitShim() { return inputSplitShim; } @@ -129,50 +128,60 @@ public void setInputFormatClassName(String inputFormatClassName) { this.inputFormatClassName = inputFormatClassName; } + @Override public JobConf getJob() { return inputSplitShim.getJob(); } + @Override public long getLength() { return inputSplitShim.getLength(); } /** Returns an array containing the startoffsets of the files in the split. */ + @Override public long[] getStartOffsets() { return inputSplitShim.getStartOffsets(); } /** Returns an array containing the lengths of the files in the split. */ + @Override public long[] getLengths() { return inputSplitShim.getLengths(); } /** Returns the start offset of the ith Path. */ + @Override public long getOffset(int i) { return inputSplitShim.getOffset(i); } /** Returns the length of the ith Path. */ + @Override public long getLength(int i) { return inputSplitShim.getLength(i); } /** Returns the number of Paths in the split. */ + @Override public int getNumPaths() { return inputSplitShim.getNumPaths(); } /** Returns the ith Path. */ + @Override public Path getPath(int i) { return inputSplitShim.getPath(i); } /** Returns all the Paths in the split. */ + @Override public Path[] getPaths() { return inputSplitShim.getPaths(); } /** Returns all the Paths where this input-split resides. */ + @Override public String[] getLocations() throws IOException { return inputSplitShim.getLocations(); } @@ -192,6 +201,7 @@ public String toString() { /** * Writable interface. */ + @Override public void readFields(DataInput in) throws IOException { inputSplitShim.readFields(in); inputFormatClassName = in.readUTF(); @@ -200,6 +210,7 @@ public void readFields(DataInput in) throws IOException { /** * Writable interface. */ + @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); @@ -219,11 +230,6 @@ public void write(DataOutput out) throws IOException { out.writeUTF(inputFormatClassName); } - - @Override - public void shrinkSplit(long length) { - inputSplitShim.shrinkSplit(length); - } } // Splits are not shared across different partitions with different input formats. @@ -245,9 +251,6 @@ public CombinePathInputFormat(List> opList, public boolean equals(Object o) { if (o instanceof CombinePathInputFormat) { CombinePathInputFormat mObj = (CombinePathInputFormat) o; - if (mObj == null) { - return false; - } return (opList.equals(mObj.opList)) && (inputFormatClassName.equals(mObj.inputFormatClassName)) && (deserializerClassName == null ? (mObj.deserializerClassName == null) : @@ -410,7 +413,7 @@ public int hashCode() { } // Processing directories - List iss = new ArrayList(); + List iss = new ArrayList(); if (!mrwork.isMapperCannotSpanPartns()) { //mapper can span partitions //combine into as few as one split, subject to the PathFilters set @@ -434,7 +437,7 @@ public int hashCode() { iss = sampleSplits(iss); } - for (InputSplitShim is : iss) { + for (CombineFileSplit is : iss) { CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is); result.add(csplit); } @@ -450,9 +453,6 @@ public int hashCode() { @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { init(job); - Map> pathToAliases = mrwork.getPathToAliases(); - Map> aliasToWork = - mrwork.getAliasToWork(); ArrayList result = new ArrayList(); @@ -469,9 +469,8 @@ public int hashCode() { IOPrepareCache.get().allocatePartitionDescMap()); // Use HiveInputFormat if any of the paths is not splittable - Class inputFormatClass = part.getInputFileFormatClass(); - String inputFormatClassName = inputFormatClass.getName(); - InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); + Class inputFormatClass = part.getInputFileFormatClass(); + InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); if (inputFormat instanceof AvoidSplitCombination && ((AvoidSplitCombination) inputFormat).shouldSkipCombine(path, job)) { if (LOG.isDebugEnabled()) { @@ -523,7 +522,7 @@ public int hashCode() { } private void processPaths(JobConf job, CombineFileInputFormatShim combine, - List iss, Path... path) throws IOException { + List iss, Path... path) throws IOException { JobConf currJob = new JobConf(job); FileInputFormat.setInputPaths(currJob, path); iss.addAll(Arrays.asList(combine.getSplits(currJob, 1))); @@ -540,16 +539,16 @@ private void processPaths(JobConf job, CombineFileInputFormatShim combine, * @param splits * @return the sampled splits */ - private List sampleSplits(List splits) { + private List sampleSplits(List splits) { HashMap nameToSamples = mrwork.getNameToSplitSample(); - List retLists = new ArrayList(); - Map> aliasToSplitList = new HashMap>(); + List retLists = new ArrayList(); + Map> aliasToSplitList = new HashMap>(); Map> pathToAliases = mrwork.getPathToAliases(); Map> pathToAliasesNoScheme = removeScheme(pathToAliases); // Populate list of exclusive splits for every sampled alias // - for (InputSplitShim split : splits) { + for (CombineFileSplit split : splits) { String alias = null; for (Path path : split.getPaths()) { boolean schemeless = path.toUri().getScheme() == null; @@ -571,7 +570,7 @@ private void processPaths(JobConf job, CombineFileInputFormatShim combine, // split exclusively serves alias, which needs to be sampled // add it to the split list of the alias. if (!aliasToSplitList.containsKey(alias)) { - aliasToSplitList.put(alias, new ArrayList()); + aliasToSplitList.put(alias, new ArrayList()); } aliasToSplitList.get(alias).add(split); } else { @@ -583,10 +582,10 @@ private void processPaths(JobConf job, CombineFileInputFormatShim combine, // for every sampled alias, we figure out splits to be sampled and add // them to return list // - for (Map.Entry> entry: aliasToSplitList.entrySet()) { - ArrayList splitList = entry.getValue(); + for (Map.Entry> entry: aliasToSplitList.entrySet()) { + ArrayList splitList = entry.getValue(); long totalSize = 0; - for (InputSplitShim split : splitList) { + for (CombineFileSplit split : splitList) { totalSize += split.getLength(); } @@ -596,13 +595,13 @@ private void processPaths(JobConf job, CombineFileInputFormatShim combine, int startIndex = splitSample.getSeedNum() % splitList.size(); long size = 0; for (int i = 0; i < splitList.size(); i++) { - InputSplitShim split = splitList.get((startIndex + i) % splitList.size()); + CombineFileSplit split = splitList.get((startIndex + i) % splitList.size()); retLists.add(split); long splitgLength = split.getLength(); if (size + splitgLength >= targetSize) { LOG.info("Sample alias " + entry.getValue() + " using " + (i + 1) + "splits"); if (size + splitgLength > targetSize) { - split.shrinkSplit(targetSize - size); + ((InputSplitShim)split).shrinkSplit(targetSize - size); } break; } @@ -674,6 +673,7 @@ public void addPath(Path p) { // returns true if the specified path matches the prefix stored // in this TestFilter. + @Override public boolean accept(Path path) { boolean find = false; while (path != null && !find) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java index 6318b2f..5ac9f85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java @@ -23,7 +23,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.CombineHiveInputSplit; -import org.apache.hadoop.hive.shims.HadoopShims.InputSplitShim; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.FileSplit; @@ -31,6 +30,7 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.lib.CombineFileSplit; /** * CombineHiveRecordReader. @@ -45,7 +45,7 @@ public CombineHiveRecordReader(InputSplit split, Configuration conf, Reporter reporter, Integer partition) throws IOException { super((JobConf)conf); CombineHiveInputSplit hsplit = new CombineHiveInputSplit(jobConf, - (InputSplitShim) split); + (CombineFileSplit) split); String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { @@ -72,14 +72,17 @@ public void doClose() throws IOException { recordReader.close(); } + @Override public K createKey() { return (K) recordReader.createKey(); } + @Override public V createValue() { return (V) recordReader.createValue(); } + @Override public long getPos() throws IOException { return recordReader.getPos(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java index 2ac60c0..e2ae25b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.io; import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -27,7 +26,9 @@ import java.util.Map.Entry; import java.util.Properties; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -35,23 +36,26 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.OutputCommitter; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TaskAttemptContext; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.ReflectionUtils; @@ -67,23 +71,16 @@ static { outputFormatSubstituteMap = - new HashMap, Class>(); + new ConcurrentHashMap, Class>(); HiveFileFormatUtils.registerOutputFormatSubstitute( IgnoreKeyTextOutputFormat.class, HiveIgnoreKeyTextOutputFormat.class); HiveFileFormatUtils.registerOutputFormatSubstitute( SequenceFileOutputFormat.class, HiveSequenceFileOutputFormat.class); } - private static ThreadLocal tRealOutputFormat = new ThreadLocal() { - @Override - protected String initialValue() { - return null; - } - }; - @SuppressWarnings("unchecked") - private static Map, Class> - outputFormatSubstituteMap; + private static Map, Class> + outputFormatSubstituteMap; /** * register a substitute. @@ -93,8 +90,7 @@ protected String initialValue() { * @param substitute */ @SuppressWarnings("unchecked") - public static synchronized void registerOutputFormatSubstitute( - Class origin, + public static void registerOutputFormatSubstitute(Class origin, Class substitute) { outputFormatSubstituteMap.put(origin, substitute); } @@ -103,44 +99,19 @@ public static synchronized void registerOutputFormatSubstitute( * get a OutputFormat's substitute HiveOutputFormat. */ @SuppressWarnings("unchecked") - public static synchronized Class getOutputFormatSubstitute( - Class origin, boolean storagehandlerflag) { - if (HiveOutputFormat.class.isAssignableFrom(origin)) { - return (Class) origin; + public static Class getOutputFormatSubstitute( + Class origin) { + if (origin == null || HiveOutputFormat.class.isAssignableFrom(origin)) { + return (Class) origin; // hive native } - Class result = outputFormatSubstituteMap - .get(origin); - if ((storagehandlerflag == true) && (result == null || result == HivePassThroughOutputFormat.class)) { - HiveFileFormatUtils.setRealOutputFormatClassName(origin.getName()); - result = HivePassThroughOutputFormat.class; + Class substitute = outputFormatSubstituteMap.get(origin); + if (substitute != null) { + return substitute; // substituted } - return result; - } - - /** - * get a RealOutputFormatClassName corresponding to the HivePassThroughOutputFormat - */ - @SuppressWarnings("unchecked") - public static String getRealOutputFormatClassName() - { - return tRealOutputFormat.get(); + return (Class) origin; } /** - * set a RealOutputFormatClassName corresponding to the HivePassThroughOutputFormat - */ - public static void setRealOutputFormatClassName( - String destination) { - if (destination != null){ - tRealOutputFormat.set(destination); - } - else { - return; - } - } - - - /** * get the final output path of a given FileOutputFormat. * * @param parent @@ -279,39 +250,34 @@ public static RecordWriter getHiveRecordWriter(JobConf jc, } public static RecordWriter getRecordWriter(JobConf jc, - HiveOutputFormat hiveOutputFormat, - final Class valueClass, boolean isCompressed, + OutputFormat outputFormat, + Class valueClass, boolean isCompressed, Properties tableProp, Path outPath, Reporter reporter ) throws IOException, HiveException { - if (hiveOutputFormat != null) { - return hiveOutputFormat.getHiveRecordWriter(jc, outPath, valueClass, - isCompressed, tableProp, reporter); + if (!(outputFormat instanceof HiveOutputFormat)) { + outputFormat = new HivePassThroughOutputFormat(outputFormat); } - return null; + return ((HiveOutputFormat)outputFormat).getHiveRecordWriter( + jc, outPath, valueClass, isCompressed, tableProp, reporter); } - private static HiveOutputFormat getHiveOutputFormat(JobConf jc, TableDesc tableInfo) + public static HiveOutputFormat getHiveOutputFormat(Configuration conf, TableDesc tableDesc) throws HiveException { - boolean storagehandlerofhivepassthru = false; - HiveOutputFormat hiveOutputFormat; - try { - if (tableInfo.getJobProperties() != null) { - if (tableInfo.getJobProperties().get( - HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY) != null) { - jc.set(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY, - tableInfo.getJobProperties() - .get(HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY)); - storagehandlerofhivepassthru = true; - } - } - if (storagehandlerofhivepassthru) { - return ReflectionUtils.newInstance(tableInfo.getOutputFileFormatClass(), jc); - } else { - return tableInfo.getOutputFileFormatClass().newInstance(); - } - } catch (Exception e) { - throw new HiveException(e); + return getHiveOutputFormat(conf, tableDesc.getOutputFileFormatClass()); + } + + public static HiveOutputFormat getHiveOutputFormat(Configuration conf, PartitionDesc partDesc) + throws HiveException { + return getHiveOutputFormat(conf, partDesc.getOutputFileFormatClass()); + } + + private static HiveOutputFormat getHiveOutputFormat( + Configuration conf, Class outputClass) throws HiveException { + OutputFormat outputFormat = ReflectionUtils.newInstance(outputClass, conf); + if (!(outputFormat instanceof HiveOutputFormat)) { + outputFormat = new HivePassThroughOutputFormat(outputFormat); } + return (HiveOutputFormat) outputFormat; } public static RecordUpdater getAcidRecordUpdater(JobConf jc, TableDesc tableInfo, int bucket, @@ -544,4 +510,42 @@ private static String getMatchingPath(Map> pathToAlias private HiveFileFormatUtils() { // prevent instantiation } + + public static class NullOutputCommitter extends OutputCommitter { + @Override + public void setupJob(JobContext jobContext) { } + @Override + public void cleanupJob(JobContext jobContext) { } + + @Override + public void setupTask(TaskAttemptContext taskContext) { } + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) { + return false; + } + @Override + public void commitTask(TaskAttemptContext taskContext) { } + @Override + public void abortTask(TaskAttemptContext taskContext) { } + } + + /** + * Hive uses side effect files exclusively for it's output. It also manages + * the setup/cleanup/commit of output from the hive client. As a result it does + * not need support for the same inside the MR framework + * + * This routine sets the appropriate options related to bypass setup/cleanup/commit + * support in the MR framework, but does not set the OutputFormat class. + */ + public static void prepareJobOutput(JobConf conf) { + conf.setOutputCommitter(NullOutputCommitter.class); + + // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) + // but can be backported. So we disable setup/cleanup in all versions >= 0.19 + conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false); + + // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) + // but can be backported. So we disable setup/cleanup in all versions >= 0.19 + conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index a6a8176..df54cf4 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -197,18 +197,22 @@ public void configure(JobConf job) { public static InputFormat getInputFormatFromCache( Class inputFormatClass, JobConf job) throws IOException { - - if (!inputFormats.containsKey(inputFormatClass)) { + InputFormat instance = inputFormats.get(inputFormatClass); + if (instance == null) { try { - InputFormat newInstance = (InputFormat) ReflectionUtils + instance = (InputFormat) ReflectionUtils .newInstance(inputFormatClass, job); - inputFormats.put(inputFormatClass, newInstance); + // HBase input formats are not thread safe today. See HIVE-8808. + String inputFormatName = inputFormatClass.getName().toLowerCase(); + if (!inputFormatName.contains("hbase")) { + inputFormats.put(inputFormatClass, instance); + } } catch (Exception e) { throw new IOException("Cannot create an instance of InputFormat class " + inputFormatClass.getName() + " as specified in mapredWork!", e); } } - return inputFormats.get(inputFormatClass); + return instance; } public RecordReader getRecordReader(InputSplit split, JobConf job, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java index 04eff93..5855288 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HivePassThroughOutputFormat.java @@ -21,80 +21,35 @@ import java.io.IOException; import java.util.Properties; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ReflectionUtils; /** * This pass through class is used to wrap OutputFormat implementations such that new OutputFormats not derived from * HiveOutputFormat gets through the checker */ +public class HivePassThroughOutputFormat implements HiveOutputFormat{ -public class HivePassThroughOutputFormat implements Configurable, HiveOutputFormat{ + private final OutputFormat actualOutputFormat; - private OutputFormat, ? super Writable> actualOutputFormat; - private String actualOutputFormatClass = ""; - private Configuration conf; - private boolean initialized; - public static final String HIVE_PASSTHROUGH_OF_CLASSNAME = - "org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat"; - - public static final String HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY = - "hive.passthrough.storagehandler.of"; - - public HivePassThroughOutputFormat() { - //construct this class through ReflectionUtils from FileSinkOperator - this.actualOutputFormat = null; - this.initialized = false; - } - - private void createActualOF() throws IOException { - Class cls; - try { - int e; - if (actualOutputFormatClass != null) - { - cls = - (Class) Class.forName(actualOutputFormatClass, true, - Utilities.getSessionSpecifiedClassLoader()); - } else { - throw new RuntimeException("Null pointer detected in actualOutputFormatClass"); - } - } catch (ClassNotFoundException e) { - throw new IOException(e); - } - OutputFormat, ? super Writable> actualOF = - ReflectionUtils.newInstance(cls, this.getConf()); - this.actualOutputFormat = actualOF; + public HivePassThroughOutputFormat(OutputFormat outputFormat) { + actualOutputFormat = outputFormat; } @Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { - if (this.initialized == false) { - createActualOF(); - this.initialized = true; - } - this.actualOutputFormat.checkOutputSpecs(ignored, job); + actualOutputFormat.checkOutputSpecs(ignored, job); } @Override public org.apache.hadoop.mapred.RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { - if (this.initialized == false) { - createActualOF(); - this.initialized = true; - } - return (RecordWriter) this.actualOutputFormat.getRecordWriter(ignored, + return (RecordWriter) actualOutputFormat.getRecordWriter(ignored, job, name, progress); } @@ -102,31 +57,12 @@ public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { - if (this.initialized == false) { - createActualOF(); - } - if (this.actualOutputFormat instanceof HiveOutputFormat) { - return ((HiveOutputFormat) this.actualOutputFormat).getHiveRecordWriter(jc, + if (actualOutputFormat instanceof HiveOutputFormat) { + return ((HiveOutputFormat) actualOutputFormat).getHiveRecordWriter(jc, finalOutPath, valueClass, isCompressed, tableProperties, progress); } - else { - FileSystem fs = finalOutPath.getFileSystem(jc); - HivePassThroughRecordWriter hivepassthroughrecordwriter = new HivePassThroughRecordWriter( - this.actualOutputFormat.getRecordWriter(fs, jc, null, progress)); - return hivepassthroughrecordwriter; - } - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration config) { - if (config.get(HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY) != null) { - actualOutputFormatClass = config.get(HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY); - } - this.conf = config; + FileSystem fs = finalOutPath.getFileSystem(jc); + RecordWriter recordWriter = actualOutputFormat.getRecordWriter(fs, jc, null, progress); + return new HivePassThroughRecordWriter(recordWriter); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java index 4c2843c..08ab1c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; import org.apache.hadoop.hive.ql.exec.mr.Throttle; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -86,7 +87,7 @@ public int execute(DriverContext driverContext) { ctxCreated = true; } - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); job.setInputFormat(work.getInputformatClass()); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(MergeFileMapper.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index a6a0ec1..6be3243 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -2388,7 +2388,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, // the stats object is converted to text and comparison is performed. // When STRINGs are converted to other base types, NumberFormat exception // can occur in which case TruthValue.YES_NO_NULL value is returned - Object baseObj = predicate.getLiteral(); + Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC); Object minValue = getConvertedStatsObj(min, baseObj); Object maxValue = getConvertedStatsObj(max, baseObj); Object predObj = getBaseObjectForComparison(baseObj, minValue); @@ -2432,7 +2432,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, if (minValue.equals(maxValue)) { // for a single value, look through to see if that value is in the // set - for (Object arg : predicate.getLiteralList()) { + for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { predObj = getBaseObjectForComparison(arg, minValue); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN) { @@ -2442,7 +2442,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, return TruthValue.NO_NULL; } else { // are all of the values outside of the range? - for (Object arg : predicate.getLiteralList()) { + for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { predObj = getBaseObjectForComparison(arg, minValue); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN || loc == Location.MIDDLE || @@ -2453,7 +2453,7 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min, return TruthValue.NO_NULL; } case BETWEEN: - List args = predicate.getLiteralList(); + List args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC); Object predObj1 = getBaseObjectForComparison(args.get(0), minValue); loc = compareToRange((Comparable) predObj1, minValue, maxValue); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java new file mode 100644 index 0000000..2797654 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/FilterPredicateLeafBuilder.java @@ -0,0 +1,80 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.util.List; + +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import parquet.filter2.predicate.FilterApi; +import parquet.filter2.predicate.FilterPredicate; + +import static parquet.filter2.predicate.FilterApi.not; +import static parquet.filter2.predicate.FilterApi.or; + +/** + * The base class for building parquet supported filter predicate in primary types. + */ +public abstract class FilterPredicateLeafBuilder { + + /** + * Build filter predicate with multiple constants + * + * @param op IN or BETWEEN + * @param literals + * @param columnName + * @return + */ + public FilterPredicate buildPredicate(PredicateLeaf.Operator op, List literals, + String columnName) throws Exception { + FilterPredicate result = null; + switch (op) { + case IN: + for (Object literal : literals) { + if (result == null) { + result = buildPredict(PredicateLeaf.Operator.EQUALS, literal, columnName); + } else { + result = or(result, buildPredict(PredicateLeaf.Operator.EQUALS, literal, + columnName)); + } + } + return result; + case BETWEEN: + if (literals.size() != 2) { + throw new RuntimeException( + "Not able to build 'between' operation filter with " + literals + + " which needs two literals"); + } + Object min = literals.get(0); + Object max = literals.get(1); + FilterPredicate lt = not(buildPredict(PredicateLeaf.Operator.LESS_THAN_EQUALS, + min, columnName)); + FilterPredicate gt = buildPredict(PredicateLeaf.Operator.LESS_THAN, max, columnName); + result = FilterApi.and(gt, lt); + return result; + default: + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); + } + } + + /** + * Build predicate with a single constant + * + * @param op EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL + * @param constant + * @param columnName + * @return null or a FilterPredicate, null means no filter will be executed + */ + public abstract FilterPredicate buildPredict(PredicateLeaf.Operator op, Object constant, + String columnName) throws Exception; +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java new file mode 100644 index 0000000..83865e8 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java @@ -0,0 +1,169 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator; + +import parquet.filter2.predicate.FilterApi; +import parquet.filter2.predicate.FilterPredicate; +import parquet.io.api.Binary; + +import static parquet.filter2.predicate.FilterApi.eq; +import static parquet.filter2.predicate.FilterApi.lt; +import static parquet.filter2.predicate.FilterApi.ltEq; +import static parquet.filter2.predicate.FilterApi.binaryColumn; +import static parquet.filter2.predicate.FilterApi.booleanColumn; +import static parquet.filter2.predicate.FilterApi.doubleColumn; +import static parquet.filter2.predicate.FilterApi.intColumn; + +public class LeafFilterFactory { + private static final Log LOG = LogFactory.getLog(LeafFilterFactory.class); + + class IntFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + /** + * @param op consists of EQUALS, NULL_SAFE_EQUALS, LESS_THAN, LESS_THAN_EQUALS, IS_NULL + * @param literal + * @param columnName + * @return + */ + @Override + public FilterPredicate buildPredict(Operator op, Object literal, + String columnName) { + switch (op) { + case LESS_THAN: + return lt(intColumn(columnName), ((Number) literal).intValue()); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return eq(intColumn(columnName), + (literal == null) ? null : ((Number) literal).intValue()); + case LESS_THAN_EQUALS: + return ltEq(intColumn(columnName), ((Number) literal).intValue()); + default: + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); + } + } + } + + class LongFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) { + switch (op) { + case LESS_THAN: + return lt(FilterApi.longColumn(columnName), ((Number) constant).longValue()); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return eq(FilterApi.longColumn(columnName), + (constant == null) ? null : ((Number) constant).longValue()); + case LESS_THAN_EQUALS: + return ltEq(FilterApi.longColumn(columnName), + ((Number) constant).longValue()); + default: + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); + } + } + } + + class DoubleFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) { + switch (op) { + case LESS_THAN: + return lt(doubleColumn(columnName), ((Number) constant).doubleValue()); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return eq(doubleColumn(columnName), + (constant == null) ? null : ((Number) constant).doubleValue()); + case LESS_THAN_EQUALS: + return ltEq(FilterApi.doubleColumn(columnName), + ((Number) constant).doubleValue()); + default: + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); + } + } + } + + class BooleanFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) throws Exception{ + switch (op) { + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return eq(booleanColumn(columnName), + (constant == null) ? null : ((Boolean) constant).booleanValue()); + default: + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); + } + } + } + + class BinaryFilterPredicateLeafBuilder extends FilterPredicateLeafBuilder { + @Override + public FilterPredicate buildPredict(Operator op, Object constant, + String columnName) throws Exception{ + switch (op) { + case LESS_THAN: + return lt(binaryColumn(columnName), Binary.fromString((String) constant)); + case IS_NULL: + case EQUALS: + case NULL_SAFE_EQUALS: + return eq(binaryColumn(columnName), + (constant == null) ? null : Binary.fromString((String) constant)); + case LESS_THAN_EQUALS: + return ltEq(binaryColumn(columnName), Binary.fromString((String) constant)); + default: + // should never be executed + throw new RuntimeException("Unknown PredicateLeaf Operator type: " + op); + } + } + } + + /** + * get leaf filter builder by FilterPredicateType, currently date, decimal and timestamp is not + * supported yet. + * @param type FilterPredicateType + * @return + */ + public FilterPredicateLeafBuilder getLeafFilterBuilderByType(PredicateLeaf.Type type){ + switch (type){ + case INTEGER: + return new IntFilterPredicateLeafBuilder(); + case LONG: + return new LongFilterPredicateLeafBuilder(); + case FLOAT: // float and double + return new DoubleFilterPredicateLeafBuilder(); + case STRING: // string, char, varchar + return new BinaryFilterPredicateLeafBuilder(); + case BOOLEAN: + return new BooleanFilterPredicateLeafBuilder(); + case DATE: + case DECIMAL: + case TIMESTAMP: + default: + LOG.debug("Conversion to Parquet FilterPredicate not supported for " + type); + return null; + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java deleted file mode 100644 index 582a5df..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.parquet.convert; - -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.Writable; - -import parquet.io.ParquetDecodingException; -import parquet.io.api.Converter; -import parquet.schema.GroupType; - -public class ArrayWritableGroupConverter extends HiveGroupConverter { - - private final Converter[] converters; - private final HiveGroupConverter parent; - private final int index; - private final boolean isMap; - private Writable currentValue; - private Writable[] mapPairContainer; - - public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, - final int index) { - this.parent = parent; - this.index = index; - int count = groupType.getFieldCount(); - if (count < 1 || count > 2) { - throw new IllegalStateException("Field count must be either 1 or 2: " + count); - } - isMap = count == 2; - converters = new Converter[count]; - for (int i = 0; i < count; i++) { - converters[i] = getConverterFromDescription(groupType.getType(i), i, this); - } - } - - @Override - public Converter getConverter(final int fieldIndex) { - return converters[fieldIndex]; - } - - @Override - public void start() { - if (isMap) { - mapPairContainer = new Writable[2]; - } - } - - @Override - public void end() { - if (isMap) { - currentValue = new ArrayWritable(Writable.class, mapPairContainer); - } - parent.add(index, currentValue); - } - - @Override - protected void set(final int index, final Writable value) { - if (index != 0 && mapPairContainer == null || index > 1) { - throw new ParquetDecodingException("Repeated group can only have one or two fields for maps." + - " Not allowed to set for the index : " + index); - } - - if (isMap) { - mapPairContainer[index] = value; - } else { - currentValue = value; - } - } - - @Override - protected void add(final int index, final Writable value) { - set(index, value); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java new file mode 100644 index 0000000..a86d6f4 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ConverterParent.java @@ -0,0 +1,7 @@ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import org.apache.hadoop.io.Writable; + +interface ConverterParent { + void set(int index, Writable value); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java deleted file mode 100644 index 0e310fb..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java +++ /dev/null @@ -1,140 +0,0 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.parquet.convert; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.Writable; - -import parquet.io.api.Converter; -import parquet.schema.GroupType; -import parquet.schema.Type; - -/** - * - * A MapWritableGroupConverter, real converter between hive and parquet types recursively for complex types. - * - */ -public class DataWritableGroupConverter extends HiveGroupConverter { - - private final Converter[] converters; - private final HiveGroupConverter parent; - private final int index; - private final Object[] currentArr; - private Writable[] rootMap; - - public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) { - this(requestedSchema, null, 0, tableSchema); - final int fieldCount = tableSchema.getFieldCount(); - this.rootMap = new Writable[fieldCount]; - } - - public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent, - final int index) { - this(groupType, parent, index, groupType); - } - - public DataWritableGroupConverter(final GroupType selectedGroupType, - final HiveGroupConverter parent, final int index, final GroupType containingGroupType) { - this.parent = parent; - this.index = index; - final int totalFieldCount = containingGroupType.getFieldCount(); - final int selectedFieldCount = selectedGroupType.getFieldCount(); - - currentArr = new Object[totalFieldCount]; - converters = new Converter[selectedFieldCount]; - - List selectedFields = selectedGroupType.getFields(); - for (int i = 0; i < selectedFieldCount; i++) { - Type subtype = selectedFields.get(i); - if (containingGroupType.getFields().contains(subtype)) { - converters[i] = getConverterFromDescription(subtype, - containingGroupType.getFieldIndex(subtype.getName()), this); - } else { - throw new IllegalStateException("Group type [" + containingGroupType + - "] does not contain requested field: " + subtype); - } - } - } - - public final ArrayWritable getCurrentArray() { - final Writable[] writableArr; - if (this.rootMap != null) { // We're at the root : we can safely re-use the same map to save perf - writableArr = this.rootMap; - } else { - writableArr = new Writable[currentArr.length]; - } - - for (int i = 0; i < currentArr.length; i++) { - final Object obj = currentArr[i]; - if (obj instanceof List) { - final List objList = (List)obj; - final ArrayWritable arr = new ArrayWritable(Writable.class, - objList.toArray(new Writable[objList.size()])); - writableArr[i] = arr; - } else { - writableArr[i] = (Writable) obj; - } - } - return new ArrayWritable(Writable.class, writableArr); - } - - @Override - final protected void set(final int index, final Writable value) { - currentArr[index] = value; - } - - @Override - public Converter getConverter(final int fieldIndex) { - return converters[fieldIndex]; - } - - @Override - public void start() { - for (int i = 0; i < currentArr.length; i++) { - currentArr[i] = null; - } - } - - @Override - public void end() { - if (parent != null) { - parent.set(index, getCurrentArray()); - } - } - - @Override - protected void add(final int index, final Writable value) { - if (currentArr[index] != null) { - final Object obj = currentArr[index]; - if (obj instanceof List) { - final List list = (List) obj; - list.add(value); - } else { - throw new IllegalStateException("This should be a List: " + obj); - } - } else { - // create a list here because we don't know the final length of the object - // and it is more flexible than ArrayWritable. - // - // converted to ArrayWritable by getCurrentArray(). - final List buffer = new ArrayList(); - buffer.add(value); - currentArr[index] = (Object) buffer; - } - - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java index 5a46136..000e8ea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java @@ -13,11 +13,7 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; -import java.util.List; - -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.ArrayWritable; - import parquet.io.api.GroupConverter; import parquet.io.api.RecordMaterializer; import parquet.schema.GroupType; @@ -29,10 +25,10 @@ */ public class DataWritableRecordConverter extends RecordMaterializer { - private final DataWritableGroupConverter root; + private final HiveStructConverter root; public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) { - this.root = new DataWritableGroupConverter(requestedSchema, tableSchema); + this.root = new HiveStructConverter(requestedSchema, tableSchema); } @Override @@ -44,4 +40,4 @@ public ArrayWritable getCurrentRecord() { public GroupConverter getRootConverter() { return root; } -} \ No newline at end of file +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index bce6400..23bb364 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -47,7 +47,7 @@ EDOUBLE_CONVERTER(Double.TYPE) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new PrimitiveConverter() { @Override public void addDouble(final double value) { @@ -58,7 +58,7 @@ public void addDouble(final double value) { }, EBOOLEAN_CONVERTER(Boolean.TYPE) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new PrimitiveConverter() { @Override public void addBoolean(final boolean value) { @@ -69,7 +69,7 @@ public void addBoolean(final boolean value) { }, EFLOAT_CONVERTER(Float.TYPE) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new PrimitiveConverter() { @Override public void addFloat(final float value) { @@ -80,7 +80,7 @@ public void addFloat(final float value) { }, EINT32_CONVERTER(Integer.TYPE) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new PrimitiveConverter() { @Override public void addInt(final int value) { @@ -91,7 +91,7 @@ public void addInt(final int value) { }, EINT64_CONVERTER(Long.TYPE) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new PrimitiveConverter() { @Override public void addLong(final long value) { @@ -102,7 +102,7 @@ public void addLong(final long value) { }, EBINARY_CONVERTER(Binary.class) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new BinaryConverter(type, parent, index) { @Override protected BytesWritable convert(Binary binary) { @@ -113,7 +113,7 @@ protected BytesWritable convert(Binary binary) { }, ESTRING_CONVERTER(String.class) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new BinaryConverter(type, parent, index) { @Override protected Text convert(Binary binary) { @@ -124,7 +124,7 @@ protected Text convert(Binary binary) { }, EDECIMAL_CONVERTER(BigDecimal.class) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new BinaryConverter(type, parent, index) { @Override protected HiveDecimalWritable convert(Binary binary) { @@ -135,7 +135,7 @@ protected HiveDecimalWritable convert(Binary binary) { }, ETIMESTAMP_CONVERTER(TimestampWritable.class) { @Override - Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) { + PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent) { return new BinaryConverter(type, parent, index) { @Override protected TimestampWritable convert(Binary binary) { @@ -157,10 +157,10 @@ private ETypeConverter(final Class type) { return _type; } - abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent); + abstract PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent); - public static Converter getNewConverter(final PrimitiveType type, final int index, - final HiveGroupConverter parent) { + public static PrimitiveConverter getNewConverter(final PrimitiveType type, final int index, + final ConverterParent parent) { if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) { //TODO- cleanup once parquet support Timestamp type annotation. return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent); @@ -183,11 +183,11 @@ public static Converter getNewConverter(final PrimitiveType type, final int inde public abstract static class BinaryConverter extends PrimitiveConverter { protected final PrimitiveType type; - private final HiveGroupConverter parent; + private final ConverterParent parent; private final int index; private ArrayList lookupTable; - public BinaryConverter(PrimitiveType type, HiveGroupConverter parent, int index) { + public BinaryConverter(PrimitiveType type, ConverterParent parent, int index) { this.type = type; this.parent = parent; this.index = index; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java new file mode 100644 index 0000000..872900b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveCollectionConverter.java @@ -0,0 +1,164 @@ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import parquet.io.api.Converter; +import parquet.schema.GroupType; +import parquet.schema.Type; + +public class HiveCollectionConverter extends HiveGroupConverter { + private final GroupType collectionType; + private final ConverterParent parent; + private final int index; + private final Converter innerConverter; + private final List list = new ArrayList(); + + public static HiveGroupConverter forMap(GroupType mapType, + ConverterParent parent, + int index) { + return new HiveCollectionConverter( + mapType, parent, index, true /* its a map */ ); + } + + public static HiveGroupConverter forList(GroupType listType, + ConverterParent parent, + int index) { + return new HiveCollectionConverter( + listType, parent, index, false /* not a map */ ); + } + + private HiveCollectionConverter(GroupType collectionType, + ConverterParent parent, + int index, boolean isMap) { + this.collectionType = collectionType; + this.parent = parent; + this.index = index; + Type repeatedType = collectionType.getType(0); + if (isMap) { + this.innerConverter = new KeyValueConverter( + repeatedType.asGroupType(), this); + } else if (isElementType(repeatedType, collectionType.getName())) { + this.innerConverter = getConverterFromDescription(repeatedType, 0, this); + } else { + this.innerConverter = new ElementConverter( + repeatedType.asGroupType(), this); + } + } + + @Override + public Converter getConverter(int fieldIndex) { + Preconditions.checkArgument( + fieldIndex == 0, "Invalid field index: " + fieldIndex); + return innerConverter; + } + + @Override + public void start() { + list.clear(); + } + + @Override + public void end() { + parent.set(index, wrapList(new ArrayWritable( + Writable.class, list.toArray(new Writable[list.size()])))); + } + + @Override + public void set(int index, Writable value) { + list.add(value); + } + + private static class KeyValueConverter extends HiveGroupConverter { + private final HiveGroupConverter parent; + private final Converter keyConverter; + private final Converter valueConverter; + private Writable[] keyValue = null; + + public KeyValueConverter(GroupType keyValueType, HiveGroupConverter parent) { + this.parent = parent; + this.keyConverter = getConverterFromDescription( + keyValueType.getType(0), 0, this); + this.valueConverter = getConverterFromDescription( + keyValueType.getType(1), 1, this); + } + + @Override + public void set(int fieldIndex, Writable value) { + keyValue[fieldIndex] = value; + } + + @Override + public Converter getConverter(int fieldIndex) { + switch (fieldIndex) { + case 0: + return keyConverter; + case 1: + return valueConverter; + default: + throw new IllegalArgumentException( + "Invalid field index for map key-value: " + fieldIndex); + } + } + + @Override + public void start() { + this.keyValue = new Writable[2]; + } + + @Override + public void end() { + parent.set(0, new ArrayWritable(Writable.class, keyValue)); + } + } + + private static class ElementConverter extends HiveGroupConverter { + private final HiveGroupConverter parent; + private final Converter elementConverter; + private Writable element = null; + + public ElementConverter(GroupType repeatedType, HiveGroupConverter parent) { + this.parent = parent; + this.elementConverter = getConverterFromDescription( + repeatedType.getType(0), 0, this); + } + + @Override + public void set(int index, Writable value) { + this.element = value; + } + + @Override + public Converter getConverter(int i) { + return elementConverter; + } + + @Override + public void start() { + this.element = null; + } + + @Override + public void end() { + parent.set(0, element); + } + } + + private static boolean isElementType(Type repeatedType, String parentName) { + if (repeatedType.isPrimitive() || + (repeatedType.asGroupType().getFieldCount() != 1)) { + return true; + } else if (repeatedType.getName().equals("array")) { + return true; // existing avro data + } else if (repeatedType.getName().equals(parentName + "_tuple")) { + return true; // existing thrift data + } + // false for the following cases: + // * name is "list", which matches the spec + // * name is "bag", which indicates existing hive or pig data + // * ambiguous case, which should be assumed is 3-level according to spec + return false; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java index 78bdf62..11772be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java @@ -13,33 +13,62 @@ */ package org.apache.hadoop.hive.ql.io.parquet.convert; +import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; - import parquet.io.api.Converter; import parquet.io.api.GroupConverter; +import parquet.io.api.PrimitiveConverter; +import parquet.schema.GroupType; +import parquet.schema.OriginalType; +import parquet.schema.PrimitiveType; import parquet.schema.Type; -import parquet.schema.Type.Repetition; -public abstract class HiveGroupConverter extends GroupConverter { +public abstract class HiveGroupConverter extends GroupConverter implements ConverterParent { + + protected static PrimitiveConverter getConverterFromDescription(PrimitiveType type, int index, ConverterParent parent) { + if (type == null) { + return null; + } + + return ETypeConverter.getNewConverter(type, index, parent); + } + + protected static HiveGroupConverter getConverterFromDescription(GroupType type, int index, ConverterParent parent) { + if (type == null) { + return null; + } + + OriginalType annotation = type.getOriginalType(); + if (annotation == OriginalType.LIST) { + return HiveCollectionConverter.forList(type, parent, index); + } else if (annotation == OriginalType.MAP) { + return HiveCollectionConverter.forMap(type, parent, index); + } - protected static Converter getConverterFromDescription(final Type type, final int index, - final HiveGroupConverter parent) { + return new HiveStructConverter(type, parent, index); + } + + protected static Converter getConverterFromDescription(Type type, int index, ConverterParent parent) { if (type == null) { return null; } + if (type.isPrimitive()) { - return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent); - } else { - if (type.asGroupType().getRepetition() == Repetition.REPEATED) { - return new ArrayWritableGroupConverter(type.asGroupType(), parent, index); - } else { - return new DataWritableGroupConverter(type.asGroupType(), parent, index); - } + return getConverterFromDescription(type.asPrimitiveType(), index, parent); } + + return getConverterFromDescription(type.asGroupType(), index, parent); } - protected abstract void set(int index, Writable value); + /** + * The original list and map conversion didn't remove the synthetic layer and + * the ObjectInspector had to remove it. This is a temporary fix that adds an + * extra layer for the ObjectInspector to remove. + */ + static ArrayWritable wrapList(ArrayWritable list) { + return new ArrayWritable(Writable.class, new Writable[] {list}); + } - protected abstract void add(int index, Writable value); + public abstract void set(int index, Writable value); -} \ No newline at end of file +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java new file mode 100644 index 0000000..eeb3838 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveStructConverter.java @@ -0,0 +1,131 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import parquet.io.api.Converter; +import parquet.schema.GroupType; +import parquet.schema.Type; + +/** + * + * A MapWritableGroupConverter, real converter between hive and parquet types recursively for complex types. + * + */ +public class HiveStructConverter extends HiveGroupConverter { + + private final int totalFieldCount; + private final Converter[] converters; + private final ConverterParent parent; + private final int index; + private Writable[] writables; + private final List repeatedConverters; + private boolean reuseWritableArray = false; + + public HiveStructConverter(final GroupType requestedSchema, final GroupType tableSchema) { + this(requestedSchema, null, 0, tableSchema); + this.reuseWritableArray = true; + this.writables = new Writable[tableSchema.getFieldCount()]; + } + + public HiveStructConverter(final GroupType groupType, final ConverterParent parent, + final int index) { + this(groupType, parent, index, groupType); + } + + public HiveStructConverter(final GroupType selectedGroupType, + final ConverterParent parent, final int index, final GroupType containingGroupType) { + this.parent = parent; + this.index = index; + this.totalFieldCount = containingGroupType.getFieldCount(); + final int selectedFieldCount = selectedGroupType.getFieldCount(); + + converters = new Converter[selectedFieldCount]; + this.repeatedConverters = new ArrayList(); + + List selectedFields = selectedGroupType.getFields(); + for (int i = 0; i < selectedFieldCount; i++) { + Type subtype = selectedFields.get(i); + if (containingGroupType.getFields().contains(subtype)) { + int fieldIndex = containingGroupType.getFieldIndex(subtype.getName()); + converters[i] = getFieldConverter(subtype, fieldIndex); + } else { + throw new IllegalStateException("Group type [" + containingGroupType + + "] does not contain requested field: " + subtype); + } + } + } + + private Converter getFieldConverter(Type type, int fieldIndex) { + Converter converter; + if (type.isRepetition(Type.Repetition.REPEATED)) { + if (type.isPrimitive()) { + converter = new Repeated.RepeatedPrimitiveConverter( + type.asPrimitiveType(), this, fieldIndex); + } else { + converter = new Repeated.RepeatedGroupConverter( + type.asGroupType(), this, fieldIndex); + } + + repeatedConverters.add((Repeated) converter); + } else { + converter = getConverterFromDescription(type, fieldIndex, this); + } + + return converter; + } + + public final ArrayWritable getCurrentArray() { + return new ArrayWritable(Writable.class, writables); + } + + @Override + public void set(int fieldIndex, Writable value) { + writables[fieldIndex] = value; + } + + @Override + public Converter getConverter(final int fieldIndex) { + return converters[fieldIndex]; + } + + @Override + public void start() { + if (reuseWritableArray) { + // reset the array to null values + for (int i = 0; i < writables.length; i += 1) { + writables[i] = null; + } + } else { + this.writables = new Writable[totalFieldCount]; + } + for (Repeated repeated : repeatedConverters) { + repeated.parentStart(); + } + } + + @Override + public void end() { + for (Repeated repeated : repeatedConverters) { + repeated.parentEnd(); + } + if (parent != null) { + parent.set(index, getCurrentArray()); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java new file mode 100644 index 0000000..af28b4c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/Repeated.java @@ -0,0 +1,155 @@ +package org.apache.hadoop.hive.ql.io.parquet.convert; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import parquet.column.Dictionary; +import parquet.io.api.Binary; +import parquet.io.api.Converter; +import parquet.io.api.PrimitiveConverter; +import parquet.schema.GroupType; +import parquet.schema.PrimitiveType; + +/** + * Converters for repeated fields need to know when the parent field starts and + * ends to correctly build lists from the repeated values. + */ +public interface Repeated extends ConverterParent { + + public void parentStart(); + + public void parentEnd(); + + /** + * Stands in for a PrimitiveConverter and accumulates multiple values as an + * ArrayWritable. + */ + class RepeatedPrimitiveConverter extends PrimitiveConverter implements Repeated { + private final PrimitiveType primitiveType; + private final PrimitiveConverter wrapped; + private final ConverterParent parent; + private final int index; + private final List list = new ArrayList(); + + public RepeatedPrimitiveConverter(PrimitiveType primitiveType, ConverterParent parent, int index) { + this.primitiveType = primitiveType; + this.parent = parent; + this.index = index; + this.wrapped = HiveGroupConverter.getConverterFromDescription(primitiveType, 0, this); + } + + @Override + public boolean hasDictionarySupport() { + return wrapped.hasDictionarySupport(); + } + + @Override + public void setDictionary(Dictionary dictionary) { + wrapped.setDictionary(dictionary); + } + + @Override + public void addValueFromDictionary(int dictionaryId) { + wrapped.addValueFromDictionary(dictionaryId); + } + + @Override + public void addBinary(Binary value) { + wrapped.addBinary(value); + } + + @Override + public void addBoolean(boolean value) { + wrapped.addBoolean(value); + } + + @Override + public void addDouble(double value) { + wrapped.addDouble(value); + } + + @Override + public void addFloat(float value) { + wrapped.addFloat(value); + } + + @Override + public void addInt(int value) { + wrapped.addInt(value); + } + + @Override + public void addLong(long value) { + wrapped.addLong(value); + } + + @Override + public void parentStart() { + list.clear(); + } + + @Override + public void parentEnd() { + parent.set(index, HiveGroupConverter.wrapList(new ArrayWritable( + Writable.class, list.toArray(new Writable[list.size()])))); + } + + @Override + public void set(int index, Writable value) { + list.add(value); + } + } + + /** + * Stands in for a HiveGroupConverter and accumulates multiple values as an + * ArrayWritable. + */ + class RepeatedGroupConverter extends HiveGroupConverter + implements Repeated { + private final GroupType groupType; + private final HiveGroupConverter wrapped; + private final ConverterParent parent; + private final int index; + private final List list = new ArrayList(); + + public RepeatedGroupConverter(GroupType groupType, ConverterParent parent, int index) { + this.groupType = groupType; + this.parent = parent; + this.index = index; + this.wrapped = HiveGroupConverter.getConverterFromDescription(groupType, 0, this); + } + + @Override + public void set(int fieldIndex, Writable value) { + list.add(value); + } + + @Override + public Converter getConverter(int fieldIndex) { + // delegate to the group's converters + return wrapped.getConverter(fieldIndex); + } + + @Override + public void start() { + wrapped.start(); + } + + @Override + public void end() { + wrapped.end(); + } + + @Override + public void parentStart() { + list.clear(); + } + + @Override + public void parentEnd() { + parent.set(index, wrapList(new ArrayWritable( + Writable.class, list.toArray(new Writable[list.size()])))); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index f5da46d..4e4d7fd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -20,8 +20,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; @@ -32,6 +36,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import parquet.filter2.predicate.FilterPredicate; import parquet.hadoop.ParquetFileReader; import parquet.hadoop.ParquetInputFormat; import parquet.hadoop.ParquetInputSplit; @@ -83,6 +88,8 @@ public ParquetRecordReaderWrapper( taskAttemptID = new TaskAttemptID(); } + setFilter(oldJobConf); + // create a TaskInputOutputContext final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID); @@ -110,6 +117,27 @@ public ParquetRecordReaderWrapper( } } + public void setFilter(final JobConf conf) { + String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); + String columnNamesString = + conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR); + if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty() || + columnNamesString.isEmpty()) { + return; + } + + FilterPredicate p = + SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown)) + .toFilterPredicate(); + if (p != null) { + LOG.debug("Predicate filter for parquet is " + p.toString()); + ParquetInputFormat.setFilterPredicate(conf, p); + } else { + LOG.debug("No predicate filter can be generated for " + TableScanDesc.FILTER_EXPR_CONF_STR + + " with the value of " + serializedPushdown); + } + } + @Override public void close() throws IOException { if (realReader != null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java index c57dd99..4effe73 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java @@ -229,14 +229,11 @@ private ArrayWritable createArray(final Object obj, final ListObjectInspector in final List array = new ArrayList(); if (sourceArray != null) { for (final Object curObj : sourceArray) { - final Writable newObj = createObject(curObj, subInspector); - if (newObj != null) { - array.add(newObj); - } + array.add(createObject(curObj, subInspector)); } } if (array.size() > 0) { - final ArrayWritable subArray = new ArrayWritable(array.get(0).getClass(), + final ArrayWritable subArray = new ArrayWritable(Writable.class, array.toArray(new Writable[array.size()])); return new ArrayWritable(Writable.class, new Writable[] {subArray}); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index c7078ef..8f58c65 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -15,6 +15,8 @@ import java.sql.Timestamp; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -30,10 +32,10 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; -import parquet.io.ParquetEncodingException; import parquet.io.api.Binary; import parquet.io.api.RecordConsumer; import parquet.schema.GroupType; +import parquet.schema.OriginalType; import parquet.schema.Type; /** @@ -41,10 +43,10 @@ * DataWritableWriter is a writer, * that will read an ArrayWritable and give the data to parquet * with the expected schema - * + * This is a helper class used by DataWritableWriteSupport class. */ public class DataWritableWriter { - + private static final Log LOG = LogFactory.getLog(DataWritableWriter.class); private final RecordConsumer recordConsumer; private final GroupType schema; @@ -53,85 +55,156 @@ public DataWritableWriter(final RecordConsumer recordConsumer, final GroupType s this.schema = schema; } - public void write(final ArrayWritable arr) { - if (arr == null) { - return; + /** + * It writes all record values to the Parquet RecordConsumer. + * @param record Contains the record of values that are going to be written + */ + public void write(final ArrayWritable record) { + if (record != null) { + recordConsumer.startMessage(); + try { + writeGroupFields(record, schema); + } catch (RuntimeException e) { + String errorMessage = "Parquet record is malformed: " + e.getMessage(); + LOG.error(errorMessage); + throw new RuntimeException(errorMessage); + } + recordConsumer.endMessage(); } - recordConsumer.startMessage(); - writeData(arr, schema); - recordConsumer.endMessage(); } - private void writeData(final ArrayWritable arr, final GroupType type) { - if (arr == null) { - return; - } - final int fieldCount = type.getFieldCount(); - Writable[] values = arr.get(); - for (int field = 0; field < fieldCount; ++field) { - final Type fieldType = type.getType(field); - final String fieldName = fieldType.getName(); - final Writable value = values[field]; - if (value == null) { - continue; + /** + * It writes all the fields contained inside a group to the RecordConsumer. + * @param value The list of values contained in the group. + * @param type Type that contains information about the group schema. + */ + public void writeGroupFields(final ArrayWritable value, final GroupType type) { + if (value != null) { + for (int i = 0; i < type.getFieldCount(); i++) { + Type fieldType = type.getType(i); + String fieldName = fieldType.getName(); + Writable fieldValue = value.get()[i]; + + // Parquet does not write null elements + if (fieldValue != null) { + recordConsumer.startField(fieldName, i); + writeValue(fieldValue, fieldType); + recordConsumer.endField(fieldName, i); + } } + } + } - recordConsumer.startField(fieldName, field); + /** + * It writes the field value to the Parquet RecordConsumer. It detects the field type, and writes + * the correct write function. + * @param value The writable object that contains the value. + * @param type Type that contains information about the type schema. + */ + private void writeValue(final Writable value, final Type type) { + if (type.isPrimitive()) { + writePrimitive(value); + } else if (value instanceof ArrayWritable) { + GroupType groupType = type.asGroupType(); + OriginalType originalType = type.getOriginalType(); - if (fieldType.isPrimitive()) { - writePrimitive(value); + if (originalType != null && originalType.equals(OriginalType.LIST)) { + writeArray((ArrayWritable)value, groupType); + } else if (originalType != null && originalType.equals(OriginalType.MAP)) { + writeMap((ArrayWritable)value, groupType); } else { - recordConsumer.startGroup(); - if (value instanceof ArrayWritable) { - if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) { - writeArray((ArrayWritable) value, fieldType.asGroupType()); - } else { - writeData((ArrayWritable) value, fieldType.asGroupType()); - } - } else if (value != null) { - throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value); - } - - recordConsumer.endGroup(); + writeGroup((ArrayWritable) value, groupType); } + } else { + throw new RuntimeException("Field value is not an ArrayWritable object: " + type); + } + } - recordConsumer.endField(fieldName, field); + /** + * It writes a group type and all its values to the Parquet RecordConsumer. + * This is used only for optional and required groups. + * @param value ArrayWritable object that contains the group values + * @param type Type that contains information about the group schema + */ + private void writeGroup(final ArrayWritable value, final GroupType type) { + recordConsumer.startGroup(); + writeGroupFields(value, type); + recordConsumer.endGroup(); + } + + /** + * It writes a map type and its key-pair values to the Parquet RecordConsumer. + * This is called when the original type (MAP) is detected by writeValue() + * @param value The list of map values that contains the repeated KEY_PAIR_VALUE group type + * @param type Type that contains information about the group schema + */ + private void writeMap(final ArrayWritable value, final GroupType type) { + GroupType repeatedType = type.getType(0).asGroupType(); + ArrayWritable repeatedValue = (ArrayWritable)value.get()[0]; + + recordConsumer.startGroup(); + recordConsumer.startField(repeatedType.getName(), 0); + + Writable[] map_values = repeatedValue.get(); + for (int record = 0; record < map_values.length; record++) { + Writable key_value_pair = map_values[record]; + if (key_value_pair != null) { + // Hive wraps a map key-pair into an ArrayWritable + if (key_value_pair instanceof ArrayWritable) { + writeGroup((ArrayWritable)key_value_pair, repeatedType); + } else { + throw new RuntimeException("Map key-value pair is not an ArrayWritable object on record " + record); + } + } else { + throw new RuntimeException("Map key-value pair is null on record " + record); + } } + + recordConsumer.endField(repeatedType.getName(), 0); + recordConsumer.endGroup(); } + /** + * It writes a list type and its array elements to the Parquet RecordConsumer. + * This is called when the original type (LIST) is detected by writeValue() + * @param array The list of array values that contains the repeated array group type + * @param type Type that contains information about the group schema + */ private void writeArray(final ArrayWritable array, final GroupType type) { - if (array == null) { - return; - } - final Writable[] subValues = array.get(); - final int fieldCount = type.getFieldCount(); - for (int field = 0; field < fieldCount; ++field) { - final Type subType = type.getType(field); - recordConsumer.startField(subType.getName(), field); - for (int i = 0; i < subValues.length; ++i) { - final Writable subValue = subValues[i]; - if (subValue != null) { - if (subType.isPrimitive()) { - if (subValue instanceof ArrayWritable) { - writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ? - } else { - writePrimitive(subValue); - } - } else { - if (!(subValue instanceof ArrayWritable)) { - throw new RuntimeException("This should be a ArrayWritable: " + subValue); - } else { - recordConsumer.startGroup(); - writeData((ArrayWritable) subValue, subType.asGroupType()); - recordConsumer.endGroup(); - } - } + GroupType repeatedType = type.getType(0).asGroupType(); + ArrayWritable repeatedValue = (ArrayWritable)array.get()[0]; + + recordConsumer.startGroup(); + recordConsumer.startField(repeatedType.getName(), 0); + + Writable[] array_values = repeatedValue.get(); + for (int record = 0; record < array_values.length; record++) { + recordConsumer.startGroup(); + + // Null values must be wrapped into startGroup/endGroup + Writable element = array_values[record]; + if (element != null) { + for (int i = 0; i < type.getFieldCount(); i++) { + Type fieldType = repeatedType.getType(i); + String fieldName = fieldType.getName(); + + recordConsumer.startField(fieldName, i); + writeValue(element, fieldType); + recordConsumer.endField(fieldName, i); } } - recordConsumer.endField(subType.getName(), field); + + recordConsumer.endGroup(); } + + recordConsumer.endField(repeatedType.getName(), 0); + recordConsumer.endGroup(); } + /** + * It writes the primitive value to the Parquet RecordConsumer. + * @param value The writable object that contains the primitive value. + */ private void writePrimitive(final Writable value) { if (value == null) { return; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java index 75e83b8..57a9bcc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; import org.apache.hadoop.hive.ql.exec.mr.Throttle; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -102,7 +103,7 @@ public int execute(DriverContext driverContext) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, CombineHiveInputFormat.class.getName()); success = true; - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); @@ -140,9 +141,6 @@ public int execute(DriverContext driverContext) { } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } LOG.info("Using " + inpFormat); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java index 51a2cc6..65785fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHook; import org.apache.hadoop.hive.ql.exec.mr.Throttle; import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormatImpl; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -79,7 +80,7 @@ public int execute(DriverContext driverContext) { HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName()); success = true; - ShimLoader.getHadoopShims().prepareJobOutput(job); + HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); @@ -117,10 +118,6 @@ public int execute(DriverContext driverContext) { } String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); - if ((inpFormat == null) || (!StringUtils.isNotBlank(inpFormat))) { - inpFormat = ShimLoader.getHadoopShims().getInputFormatClassName(); - } - LOG.info("Using " + inpFormat); try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index eeb9641..f2f311f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.io.sarg; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayDeque; @@ -30,9 +34,13 @@ import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; +import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -56,14 +64,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; +import parquet.filter2.predicate.FilterApi; +import parquet.filter2.predicate.FilterPredicate; /** * The implementation of SearchArguments. */ final class SearchArgumentImpl implements SearchArgument { + public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class); static final class PredicateLeafImpl implements PredicateLeaf { private final Operator operator; @@ -98,7 +106,7 @@ public Operator getOperator() { } @Override - public Type getType() { + public Type getType(){ return type; } @@ -108,18 +116,55 @@ public String getColumnName() { } @Override - public Object getLiteral() { + public Object getLiteral(FileFormat format) { // To get around a kryo 2.22 bug while deserialize a Timestamp into Date // (https://github.com/EsotericSoftware/kryo/issues/88) // When we see a Date, convert back into Timestamp if (literal instanceof java.util.Date) { - return new Timestamp(((java.util.Date)literal).getTime()); + return new Timestamp(((java.util.Date) literal).getTime()); + } + + switch (format) { + case ORC: + // adapt base type to what orc needs + if (literal instanceof Integer) { + return Long.valueOf(literal.toString()); + } + return literal; + case PARQUET: + return literal; + default: + throw new RuntimeException( + "File format " + format + "is not support to build search arguments"); } - return literal; } @Override - public List getLiteralList() { + public List getLiteralList(FileFormat format) { + switch (format) { + case ORC: + return getOrcLiteralList(); + case PARQUET: + return getParquetLiteralList(); + default: + throw new RuntimeException("File format is not support to build search arguments"); + } + } + + private List getOrcLiteralList() { + // no need to cast + if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof + Integer)) { + return literalList; + } + List result = new ArrayList(); + for (Object o : literalList) { + result.add(Long.valueOf(o.toString())); + } + return result; + } + + private List getParquetLiteralList() { return literalList; } @@ -254,6 +299,76 @@ TruthValue evaluate(TruthValue[] leaves) { } } + FilterPredicate translate(List leafs){ + FilterPredicate p = null; + switch (operator) { + case OR: + for(ExpressionTree child: children) { + if (p == null) { + p = child.translate(leafs); + } else { + FilterPredicate right = child.translate(leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.or(p, right); + } + } + } + return p; + case AND: + for(ExpressionTree child: children) { + if (p == null) { + p = child.translate(leafs); + } else { + FilterPredicate right = child.translate(leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.and(p, right); + } + } + } + return p; + case NOT: + FilterPredicate op = children.get(0).translate(leafs); + if (op != null) { + return FilterApi.not(op); + } else { + return null; + } + case LEAF: + return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf)); + case CONSTANT: + return null;// no filter will be executed for constant + default: + throw new IllegalStateException("Unknown operator: " + operator); + } + } + + private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) { + LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); + FilterPredicateLeafBuilder builder; + try { + builder = leafFilterFactory + .getLeafFilterBuilderByType(leaf.getType()); + if (builder == null) return null; + if (isMultiLiteralsOperator(leaf.getOperator())) { + return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList( + PredicateLeaf.FileFormat.PARQUET), leaf.getColumnName()); + } else { + return builder + .buildPredict(leaf.getOperator(), leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET), + leaf.getColumnName()); + } + } catch (Exception e) { + LOG.error("fail to build predicate filter leaf with errors" + e, e); + return null; + } + } + + private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { + return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN); + } + @Override public String toString() { StringBuilder buffer = new StringBuilder(); @@ -314,8 +429,9 @@ Operator getOperator() { case BYTE: case SHORT: case INT: - case LONG: return PredicateLeaf.Type.INTEGER; + case LONG: + return PredicateLeaf.Type.LONG; case CHAR: case VARCHAR: case STRING: @@ -360,6 +476,8 @@ private static String getColumnName(ExprNodeGenericFuncDesc expr, private static Object boxLiteral(ExprNodeConstantDesc lit) { switch (getType(lit)) { case INTEGER: + return ((Number) lit.getValue()).intValue(); + case LONG: return ((Number) lit.getValue()).longValue(); case STRING: return StringUtils.stripEnd(lit.getValue().toString(), null); @@ -420,6 +538,7 @@ private ExpressionTree createLeaf(PredicateLeaf.Operator operator, if (type == null) { return new ExpressionTree(TruthValue.YES_NO_NULL); } + Object literal = null; List literalList = null; switch (operator) { @@ -903,6 +1022,11 @@ static SearchArgument fromKryo(String value) { return new Kryo().readObject(input, SearchArgumentImpl.class); } + @Override + public FilterPredicate toFilterPredicate() { + return expression.translate(leaves); + } + private static class BuilderImpl implements Builder { private final Deque currentTree = new ArrayDeque(); @@ -987,10 +1111,11 @@ private static Object boxLiteral(Object literal) { private static PredicateLeaf.Type getType(Object literal) { if (literal instanceof Byte || literal instanceof Short || - literal instanceof Integer || - literal instanceof Long) { + literal instanceof Integer) { return PredicateLeaf.Type.INTEGER; - } else if (literal instanceof HiveChar || + } else if(literal instanceof Long){ + return PredicateLeaf.Type.LONG; + }else if (literal instanceof HiveChar || literal instanceof HiveVarchar || literal instanceof String) { return PredicateLeaf.Type.STRING; @@ -1005,7 +1130,7 @@ private static Object boxLiteral(Object literal) { literal instanceof BigDecimal) { return PredicateLeaf.Type.DECIMAL; } else if (literal instanceof Boolean) { - return PredicateLeaf.Type.BOOLEAN; + return PredicateLeaf.Type.BOOLEAN; } throw new IllegalArgumentException("Unknown type for literal " + literal); } @@ -1069,6 +1194,7 @@ public Builder in(String column, Object... literal) { for(Object lit: literal){ argList.add(boxLiteral(lit)); } + PredicateLeaf leaf = new PredicateLeafImpl(PredicateLeaf.Operator.IN, getType(argList.get(0)), column, null, argList); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index ee8d295..cd3d349 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -443,6 +443,11 @@ public void createTable(String tableName, List columns, List par */ public void alterTable(String tblName, Table newTbl) throws InvalidOperationException, HiveException { + alterTable(tblName, newTbl, false); + } + + public void alterTable(String tblName, Table newTbl, boolean cascade) + throws InvalidOperationException, HiveException { String[] names = Utilities.getDbTableName(tblName); try { // Remove the DDL_TIME so it gets refreshed @@ -450,7 +455,7 @@ public void alterTable(String tblName, Table newTbl) newTbl.getParameters().remove(hive_metastoreConstants.DDL_TIME); } newTbl.checkValidity(); - getMSC().alter_table(names[0], names[1], newTbl.getTTable()); + getMSC().alter_table(names[0], names[1], newTbl.getTTable(), cascade); } catch (MetaException e) { throw new HiveException("Unable to alter table. " + e.getMessage(), e); } catch (TException e) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index 13277a9..bafaff8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.ProtectMode; import org.apache.hadoop.hive.metastore.Warehouse; @@ -43,9 +42,9 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.transport.TMemoryBuffer; @@ -70,7 +69,7 @@ * These fields are cached. The information comes from tPartition. */ private Deserializer deserializer; - private Class outputFormatClass; + private Class outputFormatClass; private Class inputFormatClass; /** @@ -186,16 +185,14 @@ protected void initialize(Table table, return; } - String partName = ""; if (table.isPartitioned()) { try { - partName = Warehouse.makePartName(table.getPartCols(), tPartition.getValues()); + String partName = Warehouse.makePartName(table.getPartCols(), tPartition.getValues()); if (tPartition.getSd().getLocation() == null) { // set default if location is not set and this is a physical // table partition (not a view partition) if (table.getDataLocation() != null) { - Path partPath = new Path( - table.getDataLocation().toString(), partName); + Path partPath = new Path(table.getDataLocation(), partName); tPartition.getSd().setLocation(partPath.toString()); } } @@ -287,18 +284,16 @@ public void setInputFormatClass(Class inputFormatClass) { public void setOutputFormatClass(Class outputFormatClass) { this.outputFormatClass = outputFormatClass; tPartition.getSd().setOutputFormat(HiveFileFormatUtils - .getOutputFormatSubstitute(outputFormatClass, false).toString()); + .getOutputFormatSubstitute(outputFormatClass).toString()); } final public Class getInputFormatClass() throws HiveException { if (inputFormatClass == null) { - String clsName = null; - if (tPartition != null && tPartition.getSd() != null) { - clsName = tPartition.getSd().getInputFormat(); - } + // sd can be null for views + String clsName = tPartition.getSd() == null ? null : tPartition.getSd().getInputFormat(); if (clsName == null) { - clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); + return inputFormatClass = table.getInputFormatClass(); } try { inputFormatClass = ((Class) Class.forName(clsName, true, @@ -310,25 +305,18 @@ public void setOutputFormatClass(Class outputFormatC return inputFormatClass; } - final public Class getOutputFormatClass() + final public Class getOutputFormatClass() throws HiveException { if (outputFormatClass == null) { - String clsName = null; - if (tPartition != null && tPartition.getSd() != null) { - clsName = tPartition.getSd().getOutputFormat(); - } + // sd can be null for views + String clsName = tPartition.getSd() == null ? null : tPartition.getSd().getOutputFormat(); if (clsName == null) { - clsName = HiveSequenceFileOutputFormat.class.getName(); + return outputFormatClass = table.getOutputFormatClass(); } try { - Class c = (Class.forName(clsName, true, - Utilities.getSessionSpecifiedClassLoader())); + Class c = Class.forName(clsName, true, Utilities.getSessionSpecifiedClassLoader()); // Replace FileOutputFormat for backward compatibility - if (!HiveOutputFormat.class.isAssignableFrom(c)) { - outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c,false); - } else { - outputFormatClass = (Class)c; - } + outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c); } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + clsName, e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index f1f723c..ce5b284 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -227,6 +227,18 @@ public boolean tableExists(String databaseName, String tableName) throws MetaExc @Override public void alter_table(String dbname, String tbl_name, org.apache.hadoop.hive.metastore.api.Table new_tbl, + boolean cascade) throws InvalidOperationException, MetaException, TException { + org.apache.hadoop.hive.metastore.api.Table old_tbl = getTempTable(dbname, tbl_name); + if (old_tbl != null) { + //actually temp table does not support partitions, cascade is not applicable here + alterTempTable(dbname, tbl_name, old_tbl, new_tbl, null); + return; + } + super.alter_table(dbname, tbl_name, new_tbl, cascade); + } + + @Override + public void alter_table(String dbname, String tbl_name, org.apache.hadoop.hive.metastore.api.Table new_tbl, EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { // First try temp table org.apache.hadoop.hive.metastore.api.Table old_tbl = getTempTable(dbname, tbl_name); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 211ab6c..35d60f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -49,8 +49,6 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -64,6 +62,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.SequenceFileInputFormat; /** @@ -85,10 +84,11 @@ * These fields are all cached fields. The information comes from tTable. */ private Deserializer deserializer; - private Class outputFormatClass; + private Class outputFormatClass; private Class inputFormatClass; private Path path; - private HiveStorageHandler storageHandler; + + private transient HiveStorageHandler storageHandler; /** * Used only for serialization. @@ -222,7 +222,7 @@ public void setInputFormatClass(Class inputFormatClass) { tTable.getSd().setInputFormat(inputFormatClass.getName()); } - public void setOutputFormatClass(Class outputFormatClass) { + public void setOutputFormatClass(Class outputFormatClass) { this.outputFormatClass = outputFormatClass; tTable.getSd().setOutputFormat(outputFormatClass.getName()); } @@ -279,7 +279,7 @@ final public Deserializer getDeserializerFromMetaStore(boolean skipConfError) { } public HiveStorageHandler getStorageHandler() { - if (storageHandler != null) { + if (storageHandler != null || !isNonNative()) { return storageHandler; } try { @@ -313,9 +313,7 @@ public HiveStorageHandler getStorageHandler() { return inputFormatClass; } - final public Class getOutputFormatClass() { - // Replace FileOutputFormat for backward compatibility - boolean storagehandler = false; + final public Class getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); @@ -326,34 +324,10 @@ public HiveStorageHandler getStorageHandler() { } c = getStorageHandler().getOutputFormatClass(); } else { - // if HivePassThroughOutputFormat - if (className.equals( - HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME)) { - if (getStorageHandler() != null) { - // get the storage handler real output format class - c = getStorageHandler().getOutputFormatClass(); - } - else { - //should not happen - return null; - } - } - else { - c = Class.forName(className, true, - Utilities.getSessionSpecifiedClassLoader()); - } - } - if (!HiveOutputFormat.class.isAssignableFrom(c)) { - if (getStorageHandler() != null) { - storagehandler = true; - } - else { - storagehandler = false; - } - outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c,storagehandler); - } else { - outputFormatClass = (Class)c; + c = Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } + // Replace FileOutputFormat for backward compatibility + outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } @@ -708,8 +682,7 @@ public void setOutputFormatClass(String name) throws HiveException { } try { Class origin = Class.forName(name, true, Utilities.getSessionSpecifiedClassLoader()); - setOutputFormatClass(HiveFileFormatUtils - .getOutputFormatSubstitute(origin,false)); + setOutputFormatClass(HiveFileFormatUtils.getOutputFormatSubstitute(origin)); } catch (ClassNotFoundException e) { throw new HiveException("Class not found: " + name, e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java index fee9bcf..5d848a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.SelectOperator; @@ -128,9 +129,16 @@ public ParseContext getParseContext() { public List getColsFromSelectExpr(SelectOperator op) { List cols = new ArrayList(); SelectDesc conf = op.getConf(); - List exprList = conf.getColList(); - for (ExprNodeDesc expr : exprList) { - cols = Utilities.mergeUniqElems(cols, expr.getCols()); + if(conf.isSelStarNoCompute()) { + for (ColumnInfo colInfo : op.getSchema().getSignature()) { + cols.add(colInfo.getInternalName()); + } + } + else { + List exprList = conf.getColList(); + for (ExprNodeDesc expr : exprList) { + cols = Utilities.mergeUniqElems(cols, expr.getCols()); + } } return cols; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 2dc66f7..afd1738 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -386,12 +386,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, scanOp.setNeededColumnIDs(null); return null; } + cols = cols == null ? new ArrayList() : cols; cppCtx.getPrunedColLists().put((Operator) nd, cols); RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver(); setupNeededColumns(scanOp, inputRR, cols); + return null; } } @@ -569,7 +571,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, // following SEL will do CP for columns from UDTF, not adding SEL in here newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size())); op.getConf().setOutputInternalColNames(newColNames); - + pruneOperator(ctx, op, newColNames); cppCtx.getPrunedColLists().put(op, colsAfterReplacement); return null; } @@ -607,6 +609,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, ((SelectDesc)select.getConf()).setSelStarNoCompute(false); ((SelectDesc)select.getConf()).setColList(colList); ((SelectDesc)select.getConf()).setOutputColumnNames(outputColNames); + pruneOperator(ctx, select, outputColNames); + + Operator udtfPath = op.getChildOperators().get(LateralViewJoinOperator.UDTF_TAG); + List lvFCols = new ArrayList(cppCtx.getPrunedColLists().get(udtfPath)); + lvFCols = Utilities.mergeUniqElems(lvFCols, outputColNames); + pruneOperator(ctx, op, lvFCols); } return null; } @@ -874,12 +882,16 @@ private static void pruneOperator(NodeProcessorCtx ctx, RowSchema inputSchema = op.getSchema(); if (inputSchema != null) { ArrayList rs = new ArrayList(); - ArrayList inputCols = inputSchema.getSignature(); - for (ColumnInfo i: inputCols) { - if (cols.contains(i.getInternalName())) { + RowResolver oldRR = ((ColumnPrunerProcCtx)ctx).getOpToParseCtxMap().get(op).getRowResolver(); + RowResolver newRR = new RowResolver(); + for(ColumnInfo i : oldRR.getRowSchema().getSignature()) { + if ( cols.contains(i.getInternalName())) { + String[] nm = oldRR.reverseLookup(i.getInternalName()); + newRR.put(nm[0], nm[1], i); rs.add(i); } } + ((ColumnPrunerProcCtx)ctx).getOpToParseCtxMap().get(op).setRowResolver(newRR); op.getSchema().setSignature(rs); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java new file mode 100644 index 0000000..60ef9dd --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** This optimization tries to remove {@link SelectOperator} from tree which don't do any + * processing except forwarding columns from its parent to its children. + * e.g., select * from (select * from src where key = value) t1 join (select * from src where key = value) t2; + * Query tree + * + * Without this optimization: + * + * TS -> FIL -> SEL -> RS -> + * JOIN -> SEL -> FS + * TS -> FIL -> SEL -> RS -> + * + * With this optimization + * + * TS -> FIL -> RS -> + * JOIN -> FS + * TS -> FIL -> RS -> + * + * Note absence of select operator after filter and after join operator. + * Also, see : identity_proj_remove.q + */ +public class IdentityProjectRemover implements Transform { + + private static final Log LOG = LogFactory.getLog(IdentityProjectRemover.class); + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", + "(" + SelectOperator.getOperatorName() + "%)"), new ProjectRemover()); + GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null)); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private static class ProjectRemover implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + SelectOperator sel = (SelectOperator)nd; + List> parents = sel.getParentOperators(); + if (parents.size() != 1 || parents.get(0) instanceof LateralViewForwardOperator) { + // Multi parents, cant handle that. + // Right now, we do not remove projection on top of + // LateralViewForward operators. + return null; + } + Operator parent = parents.get(0); + if(sel.isIdentitySelect()) { + parent.removeChildAndAdoptItsChildren(sel); + LOG.debug("Identity project remover optimization removed : " + sel); + } + return null; + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 7ea0061..a8a3d86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -135,6 +135,9 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ReduceSinkDeDuplication()); } transformations.add(new NonBlockingOpDeDupProc()); + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { + transformations.add(new IdentityProjectRemover()); + } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) { transformations.add(new GlobalLimitOptimizer()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 906dadf..b5a5a88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -244,7 +245,9 @@ private boolean checkExpressions(SelectOperator op) { } private boolean checkExpression(ExprNodeDesc expr) { - if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeColumnDesc) { + if (expr instanceof ExprNodeConstantDesc || + expr instanceof ExprNodeNullDesc|| + expr instanceof ExprNodeColumnDesc) { return true; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java index 38156c6..89c57b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/JoinCondTypeCheckProcFactory.java @@ -103,7 +103,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else if (hasTableAlias(ctx, tableOrCol, expr)) { return null; } else { - throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(expr)); + // Qualified column access for which table was not found + throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(expr)); } } @@ -143,7 +144,7 @@ private static ColumnInfo getColInfo(JoinTypeCheckCtx ctx, String tabName, Strin /** * Factory method to get ColumnExprProcessor. - * + * * @return ColumnExprProcessor. */ @Override @@ -305,7 +306,7 @@ private static boolean comparisonUDFargsRefersToBothInput(GenericUDFBaseCompare /** * Factory method to get DefaultExprProcessor. - * + * * @return DefaultExprProcessor. */ @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java index 7f52c29..0994c95 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; @@ -111,13 +112,24 @@ public static GenericUDF getHiveUDF(SqlOperator op, RelDataType dt, int argsLeng name = FunctionRegistry.UNARY_MINUS_FUNC_NAME; } } - FunctionInfo hFn = name != null ? FunctionRegistry.getFunctionInfo(name) : null; + FunctionInfo hFn; + try { + hFn = name != null ? FunctionRegistry.getFunctionInfo(name) : null; + } catch (SemanticException e) { + LOG.warn("Failed to load udf " + name, e); + hFn = null; + } if (hFn == null) - hFn = handleExplicitCast(op, dt); + try { + hFn = handleExplicitCast(op, dt); + } catch (SemanticException e) { + LOG.warn("Failed to load udf " + name, e); + hFn = null; + } return hFn == null ? null : hFn.getGenericUDF(); } - private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) { + private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) throws SemanticException { FunctionInfo castUDF = null; if (op.kind == SqlKind.CAST) { @@ -283,7 +295,13 @@ private static String getName(GenericUDF hiveUDF) { private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) { reverseOperatorMap.put(optiqFn, name); - FunctionInfo hFn = FunctionRegistry.getFunctionInfo(name); + FunctionInfo hFn; + try { + hFn = FunctionRegistry.getFunctionInfo(name); + } catch (SemanticException e) { + LOG.warn("Failed to load udf " + name, e); + hFn = null; + } if (hFn != null) { String hFnName = getName(hFn.getGenericUDF()); hiveToOptiq.put(hFnName, optiqFn); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index becbc38..f33abb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1273,7 +1273,7 @@ private static String normalizeDateCol( } else { throw new SemanticException("Unexpected date type " + colValue.getClass()); } - return HiveMetaStore.PARTITION_DATE_FORMAT.format(value); + return HiveMetaStore.PARTITION_DATE_FORMAT.get().format(value); } protected Database getDatabase(String dbName) throws SemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index b105424..46e1c59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1369,20 +1369,38 @@ private void analyzeAlterTableFileFormat(ASTNode ast, String tableName, private void addInputsOutputsAlterTable(String tableName, Map partSpec) throws SemanticException { - addInputsOutputsAlterTable(tableName, partSpec, null); + addInputsOutputsAlterTable(tableName, partSpec, null, false); } private void addInputsOutputsAlterTable(String tableName, Map partSpec, - AlterTableDesc desc) throws SemanticException { + AlterTableDesc desc)throws SemanticException { + addInputsOutputsAlterTable(tableName, partSpec, desc, false); + } + + private void addInputsOutputsAlterTable(String tableName, Map partSpec, + AlterTableDesc desc, boolean isCascade) throws SemanticException { + boolean alterPartitions = partSpec != null && !partSpec.isEmpty(); + //cascade only occurs at table level then cascade to partition level + if (isCascade && alterPartitions) { + throw new SemanticException( + ErrorMsg.ALTER_TABLE_PARTITION_CASCADE_NOT_SUPPORTED, desc.getOp().name()); + } + Table tab = getTable(tableName, true); // Determine the lock type to acquire WriteEntity.WriteType writeType = desc == null ? WriteEntity.WriteType.DDL_EXCLUSIVE : WriteEntity.determineAlterTableWriteType(desc.getOp()); - if (partSpec == null || partSpec.isEmpty()) { + + if (!alterPartitions) { inputs.add(new ReadEntity(tab)); outputs.add(new WriteEntity(tab, writeType)); - } - else { + //do not need the lock for partitions since they are covered by the table lock + if (isCascade) { + for (Partition part : getPartitions(tab, partSpec, false)) { + outputs.add(new WriteEntity(part, WriteEntity.WriteType.DDL_NO_LOCK)); + } + } + } else { ReadEntity re = new ReadEntity(tab); // In the case of altering a table for its partitions we don't need to lock the table // itself, just the partitions. But the table will have a ReadEntity. So mark that @@ -2495,32 +2513,36 @@ private void analyzeAlterTableRename(String[] source, ASTNode ast, boolean expec private void analyzeAlterTableRenameCol(String[] qualified, ASTNode ast, HashMap partSpec) throws SemanticException { String newComment = null; - String newType = null; - newType = getTypeStringFromAST((ASTNode) ast.getChild(2)); boolean first = false; String flagCol = null; - ASTNode positionNode = null; - if (ast.getChildCount() == 5) { - newComment = unescapeSQLString(ast.getChild(3).getText()); - positionNode = (ASTNode) ast.getChild(4); - } else if (ast.getChildCount() == 4) { - if (ast.getChild(3).getType() == HiveParser.StringLiteral) { - newComment = unescapeSQLString(ast.getChild(3).getText()); - } else { - positionNode = (ASTNode) ast.getChild(3); - } - } - - if (positionNode != null) { - if (positionNode.getChildCount() == 0) { - first = true; - } else { - flagCol = unescapeIdentifier(positionNode.getChild(0).getText()); - } - } - + boolean isCascade = false; + //col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT] String oldColName = ast.getChild(0).getText(); String newColName = ast.getChild(1).getText(); + String newType = getTypeStringFromAST((ASTNode) ast.getChild(2)); + int childCount = ast.getChildCount(); + for (int i = 3; i < childCount; i++) { + ASTNode child = (ASTNode)ast.getChild(i); + switch (child.getToken().getType()) { + case HiveParser.StringLiteral: + newComment = unescapeSQLString(child.getText()); + break; + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + flagCol = unescapeIdentifier(child.getChild(0).getText()); + break; + case HiveParser.KW_FIRST: + first = true; + break; + case HiveParser.TOK_CASCADE: + isCascade = true; + break; + case HiveParser.TOK_RESTRICT: + break; + default: + throw new SemanticException("Unsupported token: " + child.getToken() + + " for alter table"); + } + } /* Validate the operation of renaming a column name. */ Table tab = getTable(qualified); @@ -2536,8 +2558,8 @@ private void analyzeAlterTableRenameCol(String[] qualified, ASTNode ast, String tblName = getDotName(qualified); AlterTableDesc alterTblDesc = new AlterTableDesc(tblName, partSpec, unescapeIdentifier(oldColName), unescapeIdentifier(newColName), - newType, newComment, first, flagCol); - addInputsOutputsAlterTable(tblName, partSpec, alterTblDesc); + newType, newComment, first, flagCol, isCascade); + addInputsOutputsAlterTable(tblName, partSpec, alterTblDesc, isCascade); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); @@ -2585,10 +2607,15 @@ private void analyzeAlterTableModifyCols(String[] qualified, ASTNode ast, String tblName = getDotName(qualified); List newCols = getColumns((ASTNode) ast.getChild(0)); + boolean isCascade = false; + if (null != ast.getFirstChildWithType(HiveParser.TOK_CASCADE)) { + isCascade = true; + } + AlterTableDesc alterTblDesc = new AlterTableDesc(tblName, partSpec, newCols, - alterType); + alterType, isCascade); - addInputsOutputsAlterTable(tblName, partSpec, alterTblDesc); + addInputsOutputsAlterTable(tblName, partSpec, alterTblDesc, isCascade); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc), conf)); } @@ -2993,9 +3020,9 @@ private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException { } private static ExprNodeGenericFuncDesc makeBinaryPredicate( - String fn, ExprNodeDesc left, ExprNodeDesc right) { - return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - FunctionRegistry.getFunctionInfo(fn).getGenericUDF(), Lists.newArrayList(left, right)); + String fn, ExprNodeDesc left, ExprNodeDesc right) throws SemanticException { + return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(fn).getGenericUDF(), Lists.newArrayList(left, right)); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java index 22e5b47..01b97c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java @@ -59,7 +59,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { analyzeCreateFunction(ast); } if (ast.getToken().getType() == HiveParser.TOK_DROPFUNCTION) { - analyzeDropFunction(ast); + analyzeDropFunction(ast); } LOG.info("analyze done"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 364e8c9..df16e96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -18,13 +18,6 @@ package org.apache.hadoop.hive.ql.parse; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Stack; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -50,6 +43,13 @@ import org.apache.hadoop.hive.ql.plan.TezWork.VertexType; import org.apache.hadoop.hive.ql.plan.UnionWork; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Stack; + /** * GenTezWork separates the operator tree into tez tasks. * It is called once per leaf operator (operator that forces @@ -109,10 +109,14 @@ public Object process(Node nd, Stack stack, // operator graph. There's typically two reasons for that: a) mux/demux // b) multi insert. Mux/Demux will hit the same leaf again, multi insert // will result into a vertex with multiple FS or RS operators. - - // At this point we don't have to do anything special in this case. Just - // run through the regular paces w/o creating a new task. - work = context.rootToWorkMap.get(root); + if (context.childToWorkMap.containsKey(operator)) { + // if we've seen both root and child, we can bail. + return null; + } else { + // At this point we don't have to do anything special in this case. Just + // run through the regular paces w/o creating a new task. + work = context.rootToWorkMap.get(root); + } } else { // create a new vertex if (context.preceedingWork == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index f1365fa..4e98e9b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -1043,16 +1043,16 @@ alterStatementSuffixRename[boolean table] alterStatementSuffixAddCol @init { pushMsg("add column statement", state); } @after { popMsg(state); } - : (add=KW_ADD | replace=KW_REPLACE) KW_COLUMNS LPAREN columnNameTypeList RPAREN - -> {$add != null}? ^(TOK_ALTERTABLE_ADDCOLS columnNameTypeList) - -> ^(TOK_ALTERTABLE_REPLACECOLS columnNameTypeList) + : (add=KW_ADD | replace=KW_REPLACE) KW_COLUMNS LPAREN columnNameTypeList RPAREN restrictOrCascade? + -> {$add != null}? ^(TOK_ALTERTABLE_ADDCOLS columnNameTypeList restrictOrCascade?) + -> ^(TOK_ALTERTABLE_REPLACECOLS columnNameTypeList restrictOrCascade?) ; alterStatementSuffixRenameCol @init { pushMsg("rename column name", state); } @after { popMsg(state); } - : KW_CHANGE KW_COLUMN? oldName=identifier newName=identifier colType (KW_COMMENT comment=StringLiteral)? alterStatementChangeColPosition? - ->^(TOK_ALTERTABLE_RENAMECOL $oldName $newName colType $comment? alterStatementChangeColPosition?) + : KW_CHANGE KW_COLUMN? oldName=identifier newName=identifier colType (KW_COMMENT comment=StringLiteral)? alterStatementChangeColPosition? restrictOrCascade? + ->^(TOK_ALTERTABLE_RENAMECOL $oldName $newName colType $comment? alterStatementChangeColPosition? restrictOrCascade?) ; alterStatementSuffixUpdateStatsCol @@ -1316,9 +1316,9 @@ descPartTypeExpr descStatement @init { pushMsg("describe statement", state); } @after { popMsg(state); } - : (KW_DESCRIBE|KW_DESC) (descOptions=KW_FORMATTED|descOptions=KW_EXTENDED|descOptions=KW_PRETTY)? (parttype=descPartTypeExpr) -> ^(TOK_DESCTABLE $parttype $descOptions?) + : (KW_DESCRIBE|KW_DESC) (KW_DATABASE|KW_SCHEMA) KW_EXTENDED? (dbName=identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) + | (KW_DESCRIBE|KW_DESC) (descOptions=KW_FORMATTED|descOptions=KW_EXTENDED|descOptions=KW_PRETTY)? (parttype=descPartTypeExpr) -> ^(TOK_DESCTABLE $parttype $descOptions?) | (KW_DESCRIBE|KW_DESC) KW_FUNCTION KW_EXTENDED? (name=descFuncNames) -> ^(TOK_DESCFUNCTION $name KW_EXTENDED?) - | (KW_DESCRIBE|KW_DESC) (KW_DATABASE|KW_SCHEMA) KW_EXTENDED? (dbName=identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) ; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 4891518..eda7984 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -36,7 +36,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -51,7 +50,6 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Table; @@ -63,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.mapred.OutputFormat; /** * ImportSemanticAnalyzer. @@ -462,8 +461,8 @@ private static void checkTable(Table table, CreateTableDesc tableDesc) */ try { Class origin = Class.forName(importedofc, true, Utilities.getSessionSpecifiedClassLoader()); - Class replaced = HiveFileFormatUtils - .getOutputFormatSubstitute(origin,false); + Class replaced = HiveFileFormatUtils + .getOutputFormatSubstitute(origin); if (replaced == null) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE .getMsg()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index b15a46d..0497e5a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -103,17 +104,19 @@ public QB() { } public QB(String outer_id, String alias, boolean isSubQ) { - aliasToTabs = new HashMap(); - aliasToSubq = new HashMap(); - aliasToProps = new HashMap>(); + // Must be deterministic order maps - see HIVE-8707 + aliasToTabs = new LinkedHashMap(); + aliasToSubq = new LinkedHashMap(); + aliasToProps = new LinkedHashMap>(); aliases = new ArrayList(); if (alias != null) { alias = alias.toLowerCase(); } qbp = new QBParseInfo(alias, isSubQ); qbm = new QBMetaData(); - ptfNodeToSpec = new HashMap(); - destToWindowingSpec = new HashMap(); + // Must be deterministic order maps - see HIVE-8707 + ptfNodeToSpec = new LinkedHashMap(); + destToWindowingSpec = new LinkedHashMap(); id = getAppendedAliasFromId(outer_id, alias); } @@ -320,7 +323,8 @@ public PTFInvocationSpec getPTFInvocationSpec(ASTNode node) { } public void addPTFNodeToSpec(ASTNode node, PTFInvocationSpec spec) { - ptfNodeToSpec = ptfNodeToSpec == null ? new HashMap() : ptfNodeToSpec; + // Must be deterministic order map - see HIVE-8707 + ptfNodeToSpec = ptfNodeToSpec == null ? new LinkedHashMap() : ptfNodeToSpec; ptfNodeToSpec.put(node, spec); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java index 0c973ce..e8f3284 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBMetaData.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.Map; import org.apache.commons.logging.Log; @@ -53,13 +54,15 @@ private static final Log LOG = LogFactory.getLog(QBMetaData.class.getName()); public QBMetaData() { - aliasToTable = new HashMap(); + // Must be deterministic order map - see HIVE-8707 + aliasToTable = new LinkedHashMap(); nameToDestTable = new HashMap(); nameToDestPartition = new HashMap(); nameToDestFile = new HashMap(); nameToDestType = new HashMap(); - aliasToPartSpec = new HashMap>(); - aliasToDPCtx = new HashMap(); + // Must be deterministic order maps - see HIVE-8707 + aliasToPartSpec = new LinkedHashMap>(); + aliasToDPCtx = new LinkedHashMap(); } // All getXXX needs toLowerCase() because they are directly called from diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 016a6d8..95c6d00 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -20,7 +20,6 @@ import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; @@ -158,7 +157,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; import org.apache.hadoop.hive.ql.plan.ExtractDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; @@ -217,6 +215,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.eigenbase.rel.AggregateCall; import org.eigenbase.rel.AggregateRelBase; import org.eigenbase.rel.Aggregation; @@ -401,8 +400,9 @@ public SemanticAnalyzer(HiveConf conf) throws SemanticException { opToPartList = new HashMap(); opToSamplePruner = new HashMap(); nameToSplitSample = new HashMap(); - topOps = new HashMap>(); - topSelOps = new HashMap>(); + // Must be deterministic order maps - see HIVE-8707 + topOps = new LinkedHashMap>(); + topSelOps = new LinkedHashMap>(); loadTableWork = new ArrayList(); loadFileWork = new ArrayList(); opParseCtx = new LinkedHashMap, OpParseContext>(); @@ -1736,7 +1736,8 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException } Class outputFormatClass = ts.tableHandle.getOutputFormatClass(); - if (!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) { + if (!ts.tableHandle.isNonNative() && + !HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE .getMsg(ast, "The class is " + outputFormatClass.toString())); } @@ -3702,11 +3703,6 @@ private static boolean isRegex(String pattern, HiveConf conf) { ArrayList columnNames = new ArrayList(); Map colExprMap = new HashMap(); for (int i = 0; i < col_list.size(); i++) { - // Replace NULL with CAST(NULL AS STRING) - if (col_list.get(i) instanceof ExprNodeNullDesc) { - col_list.set(i, new ExprNodeConstantDesc( - TypeInfoFactory.stringTypeInfo, null)); - } String outputCol = getColumnInternalName(i); colExprMap.put(outputCol, col_list.get(i)); columnNames.add(outputCol); @@ -6502,6 +6498,7 @@ private void checkAcidConstraints(QB qb, TableDesc tableDesc, conf.setIntVar(ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER, 1); conf.setBoolVar(ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES, true); conf.set(AcidUtils.CONF_ACID_KEY, "true"); + conf.setBoolVar(ConfVars.HIVEOPTSORTDYNAMICPARTITION, false); if (table.getNumBuckets() < 1) { throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, table.getTableName()); @@ -8293,7 +8290,7 @@ private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[ } } - if ( targetCondn == null ) { + if ( targetCondn == null || (nodeCondn.size() != targetCondn.size())) { return new ObjectPair(-1, null); } @@ -9646,7 +9643,8 @@ public Operator genPlan(QB qb, boolean skipAmbiguityCheck) throws SemanticException { // First generate all the opInfos for the elements in the from clause - Map aliasToOpInfo = new HashMap(); + // Must be deterministic order map - see HIVE-8707 + Map aliasToOpInfo = new LinkedHashMap(); // Recurse over the subqueries to fill the subquery part of the plan for (String alias : qb.getSubqAliases()) { @@ -12455,7 +12453,7 @@ private boolean isAcidTable(Table tab) { return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true"); } - private boolean isAcidOutputFormat(Class of) { + private boolean isAcidOutputFormat(Class of) { Class[] interfaces = of.getInterfaces(); for (Class iface : interfaces) { if (iface.equals(AcidOutputFormat.class)) { @@ -12473,7 +12471,7 @@ private boolean isAcidOutputFormat(Class of) { AcidUtils.Operation.INSERT); } - private AcidUtils.Operation getAcidType(Class of) { + private AcidUtils.Operation getAcidType(Class of) { if (SessionState.get() == null || !SessionState.get().getTxnMgr().supportsAcid()) { return AcidUtils.Operation.NOT_ACID; } else if (isAcidOutputFormat(of)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 5decffb..350a961 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -690,7 +690,12 @@ public static String getFunctionText(ASTNode expr, boolean isFunction) { static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, TypeInfo typeInfo, ExprNodeDesc... children) throws UDFArgumentException { - FunctionInfo fi = FunctionRegistry.getFunctionInfo(udfName); + FunctionInfo fi; + try { + fi = FunctionRegistry.getFunctionInfo(udfName); + } catch (SemanticException e) { + throw new UDFArgumentException(e); + } if (fi == null) { throw new UDFArgumentException(udfName + " not found."); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java index f869821..681d809 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java @@ -112,6 +112,7 @@ Table table; boolean isDropIfExists = false; boolean isTurnOffSorting = false; + boolean isCascade = false; public AlterTableDesc() { } @@ -127,8 +128,8 @@ public AlterTableDesc() { * @param newType */ public AlterTableDesc(String tblName, HashMap partSpec, - String oldColName, String newColName, - String newType, String newComment, boolean first, String afterCol) { + String oldColName, String newColName, String newType, String newComment, + boolean first, String afterCol, boolean isCascade) { super(); oldName = tblName; this.partSpec = partSpec; @@ -139,6 +140,7 @@ public AlterTableDesc(String tblName, HashMap partSpec, this.first = first; this.afterCol = afterCol; op = AlterTableTypes.RENAMECOLUMN; + this.isCascade = isCascade; } /** @@ -161,11 +163,12 @@ public AlterTableDesc(String oldName, String newName, boolean expectView) { * new columns to be added */ public AlterTableDesc(String name, HashMap partSpec, List newCols, - AlterTableTypes alterType) { + AlterTableTypes alterType, boolean isCascade) { op = alterType; oldName = name; this.newCols = new ArrayList(newCols); this.partSpec = partSpec; + this.isCascade = isCascade; } /** @@ -720,6 +723,13 @@ public boolean getIsDropIfExists() { return isDropIfExists; } + /** + * @return isCascade + */ + public boolean getIsCascade() { + return isCascade; + } + public static boolean doesAlterTableTypeSupportPartialPartitionSpec(AlterTableTypes type) { return alterTableTypesWithPartialSpec.contains(type); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index deba198..8cadb96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -26,7 +26,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -40,6 +39,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.mapred.OutputFormat; /** * CreateTableDesc. @@ -420,14 +420,14 @@ public void validate(HiveConf conf) try { Class origin = Class.forName(this.getOutputFormat(), true, Utilities.getSessionSpecifiedClassLoader()); - Class replaced = HiveFileFormatUtils - .getOutputFormatSubstitute(origin,false); - if (replaced == null) { + Class replaced = HiveFileFormatUtils + .getOutputFormatSubstitute(origin); + if (!HiveOutputFormat.class.isAssignableFrom(replaced)) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE .getMsg()); } } catch (ClassNotFoundException e) { - throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); + throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(), e); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 10c38d9..b77add4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.util.ReflectionUtils; /** @@ -58,7 +59,7 @@ private TableDesc tableDesc; private LinkedHashMap partSpec; private Class inputFileFormatClass; - private Class outputFileFormatClass; + private Class outputFileFormatClass; private Properties properties; private String baseFileName; @@ -148,7 +149,7 @@ public void setInputFileFormatClass( } } - public Class getOutputFileFormatClass() { + public Class getOutputFileFormatClass() { if (outputFileFormatClass == null && tableDesc != null) { setOutputFileFormatClass(tableDesc.getOutputFileFormatClass()); } @@ -156,8 +157,8 @@ public void setInputFileFormatClass( } public void setOutputFileFormatClass(final Class outputFileFormatClass) { - Class outputClass = outputFileFormatClass == null ? null : - HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass,false); + Class outputClass = outputFileFormatClass == null ? null : + HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass); if (outputClass != null) { this.outputFileFormatClass = (Class) CLASS_INTERNER.intern(outputClass); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index fdc1f62..ae99bd3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -42,9 +42,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -827,13 +825,6 @@ private static void configureJobPropertiesForStorageHandler(boolean input, "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties); } - if (tableDesc.getOutputFileFormatClass().getName() - == HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME) { - // get the real output format when we register this for the table - jobProperties.put( - HivePassThroughOutputFormat.HIVE_PASSTHROUGH_STORAGEHANDLER_OF_JOBCONFKEY, - HiveFileFormatUtils.getRealOutputFormatClassName()); - } } // Job properties are only relevant for non-native tables, so // for native tables, leave it null to avoid cluttering up diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java index 408ddf5..374e8b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java @@ -28,12 +28,11 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; /** * TableDesc. @@ -42,7 +41,7 @@ public class TableDesc implements Serializable, Cloneable { private static final long serialVersionUID = 1L; private Class inputFileFormatClass; - private Class outputFileFormatClass; + private Class outputFileFormatClass; private java.util.Properties properties; private Map jobProperties; @@ -59,7 +58,7 @@ public TableDesc( final Class outputFormatClass, final Properties properties) { this.inputFileFormatClass = inputFormatClass; outputFileFormatClass = HiveFileFormatUtils - .getOutputFormatSubstitute(outputFormatClass, false); + .getOutputFormatSubstitute(outputFormatClass); this.properties = properties; } @@ -94,13 +93,13 @@ public void setInputFileFormatClass( this.inputFileFormatClass = inputFileFormatClass; } - public Class getOutputFileFormatClass() { + public Class getOutputFileFormatClass() { return outputFileFormatClass; } - public void setOutputFileFormatClass(final Class outputFileFormatClass) { + public void setOutputFileFormatClass(Class outputFileFormatClass) { this.outputFileFormatClass = HiveFileFormatUtils - .getOutputFormatSubstitute(outputFileFormatClass, false); + .getOutputFormatSubstitute(outputFileFormatClass); } @Explain(displayName = "properties", normalExplain = false) @@ -142,12 +141,7 @@ public String getInputFileFormatClassName() { @Explain(displayName = "output format") public String getOutputFileFormatClassName() { - if (getOutputFileFormatClass().getName() == HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME) { - return HiveFileFormatUtils.getRealOutputFormatClassName(); - } - else { - return getOutputFileFormatClass().getName(); - } + return getOutputFileFormatClass().getName(); } public boolean isNonNative() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java index d68d19d..98ca79b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/HadoopDefaultAuthenticator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; public class HadoopDefaultAuthenticator implements HiveAuthenticationProvider { @@ -49,7 +50,7 @@ public void setConf(Configuration conf) { this.conf = conf; UserGroupInformation ugi = null; try { - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + ugi = Utils.getUGIForConf(conf); } catch (Exception e) { throw new RuntimeException(e); } @@ -59,7 +60,7 @@ public void setConf(Configuration conf) { "Can not initialize HadoopDefaultAuthenticator."); } - this.userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + this.userName = ugi.getShortUserName(); if (ugi.getGroupNames() != null) { this.groupNames = Arrays.asList(ugi.getGroupNames()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java index 95a98fe..c83c649 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/ProxyUserAuthenticator.java @@ -51,7 +51,7 @@ public void setConf(Configuration conf) { // If we're here, proxy user is set. try { - ugi = ShimLoader.getHadoopShims().createRemoteUser(proxyUser,null); + ugi = UserGroupInformation.createRemoteUser(proxyUser); } catch (Exception e) { throw new RuntimeException(e); } @@ -61,7 +61,7 @@ public void setConf(Configuration conf) { "Can not initialize ProxyUserAuthenticator for user ["+proxyUser+"]"); } - this.userName = ShimLoader.getHadoopShims().getShortUserName(ugi); + this.userName = ugi.getShortUserName(); if (ugi.getGroupNames() != null) { this.groupNames = Arrays.asList(ugi.getGroupNames()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java index 3de9782..34fb487 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/SettableConfigUpdater.java @@ -44,7 +44,7 @@ public static void setHiveConfWhiteList(HiveConf hiveConf) throws HiveAuthzPlugi String whiteListParamsStr = hiveConf .getVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST); - if(whiteListParamsStr == null && whiteListParamsStr.trim().isEmpty()) { + if(whiteListParamsStr == null || whiteListParamsStr.trim().isEmpty()) { throw new HiveAuthzPluginException("Configuration parameter " + ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST.varname + " is not iniatialized."); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 049ebc9..4caa488 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveMetastoreClientFactoryImpl; import org.apache.hadoop.hive.ql.util.DosToUnix; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ReflectionUtils; @@ -237,7 +238,7 @@ * Whether we are in auto-commit state or not. Currently we are always in auto-commit, * so there are not setters for this yet. */ - private boolean txnAutoCommit = true; + private final boolean txnAutoCommit = true; /** * store the jars loaded last time @@ -427,7 +428,7 @@ public static SessionState start(SessionState startSs) { // shared with SessionState, other parts of the code might update the config, but // Hive.get(HiveConf) would not recognize the case when it needs refreshing Hive.get(new HiveConf(startSs.conf)).getMSC(); - UserGroupInformation sessionUGI = ShimLoader.getHadoopShims().getUGIForConf(startSs.conf); + UserGroupInformation sessionUGI = Utils.getUGIForConf(startSs.conf); FileSystem.get(startSs.conf); // Create scratch dirs for this session @@ -655,7 +656,7 @@ private void setupAuth() { authorizerV2 = authorizerFactory.createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(), conf, authenticator, authzContextBuilder.build()); - authorizerV2.applyAuthorizationConfigPolicy(conf); + authorizerV2.applyAuthorizationConfigPolicy(conf); } // create the create table grants with new config createTableGrants = CreateTableAutomaticGrant.create(conf); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 1d3cf00..3cdbc6c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -697,9 +698,10 @@ public void abortTask(TaskAttemptContext taskAttemptContext) throws IOException @Override public void commitJob(JobContext context) throws IOException { - Path tmpLocation = new Path(context.getJobConf().get(TMP_LOCATION)); - Path finalLocation = new Path(context.getJobConf().get(FINAL_LOCATION)); - FileSystem fs = tmpLocation.getFileSystem(context.getJobConf()); + JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); + Path tmpLocation = new Path(conf.get(TMP_LOCATION)); + Path finalLocation = new Path(conf.get(FINAL_LOCATION)); + FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Moving contents of " + tmpLocation.toString() + " to " + finalLocation.toString()); @@ -713,8 +715,9 @@ public void commitJob(JobContext context) throws IOException { @Override public void abortJob(JobContext context, int status) throws IOException { - Path tmpLocation = new Path(context.getJobConf().get(TMP_LOCATION)); - FileSystem fs = tmpLocation.getFileSystem(context.getJobConf()); + JobConf conf = ShimLoader.getHadoopShims().getJobConf(context); + Path tmpLocation = new Path(conf.get(TMP_LOCATION)); + FileSystem fs = tmpLocation.getFileSystem(conf); LOG.debug("Removing " + tmpLocation.toString()); fs.delete(tmpLocation, true); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArray.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArray.java index c429367..44eb44c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArray.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArray.java @@ -28,7 +28,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; /** @@ -59,11 +58,8 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen converters = new Converter[arguments.length]; - ObjectInspector returnOI = returnOIResolver.get(); - if (returnOI == null) { - returnOI = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING); - } + ObjectInspector returnOI = + returnOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector); for (int i = 0; i < arguments.length; i++) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], returnOI); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java index 0312306..773e1ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java @@ -82,17 +82,10 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } } - ObjectInspector keyOI = keyOIResolver.get(); - ObjectInspector valueOI = valueOIResolver.get(); - - if (keyOI == null) { - keyOI = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING); - } - if (valueOI == null) { - valueOI = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING); - } + ObjectInspector keyOI = + keyOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + ObjectInspector valueOI = + valueOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector); converters = new Converter[arguments.length]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java index a43867c..40d1bdd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSortArray.java @@ -82,10 +82,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen ((ListObjectInspector)(arguments[0])).getListElementObjectInspector(); argumentOIs = arguments; converters = new Converter[arguments.length]; - ObjectInspector returnOI = returnOIResolver.get(); - if (returnOI == null) { - returnOI = elementObjectInspector; - } + ObjectInspector returnOI = returnOIResolver.get(elementObjectInspector); converters[0] = ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI); return ObjectInspectorFactory.getStandardListObjectInspector(returnOI); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java index 833452d..f197afa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java @@ -22,7 +22,6 @@ import java.lang.reflect.Method; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; -import java.nio.ByteBuffer; import java.util.HashMap; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -41,6 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -191,7 +191,11 @@ private boolean update(ObjectInspector oi, boolean isUnionAll) throws UDFArgumen * Returns the ObjectInspector of the return value. */ public ObjectInspector get() { - return returnObjectInspector; + return get(PrimitiveObjectInspectorFactory.javaVoidObjectInspector); + } + + public ObjectInspector get(ObjectInspector defaultOI) { + return returnObjectInspector != null ? returnObjectInspector : defaultOI; } /** diff --git a/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java b/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java index 93981fa..c0fd4b3 100644 --- a/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java +++ b/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java @@ -189,11 +189,11 @@ public ExprNodeGenericFuncDesc build() throws Exception { return (ExprNodeGenericFuncDesc)stack.pop(); } - public ExprBuilder pred(String name, int args) { + public ExprBuilder pred(String name, int args) throws Exception { return fn(name, TypeInfoFactory.booleanTypeInfo, args); } - private ExprBuilder fn(String name, TypeInfo ti, int args) { + private ExprBuilder fn(String name, TypeInfo ti, int args) throws Exception { List children = new ArrayList(); for (int i = 0; i < args; ++i) { children.add(stack.pop()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index 0eb7c5a..41862e6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -181,7 +181,7 @@ private static void fileDiff(String datafile, String testdir) throws Exception { } } - private FilterDesc getTestFilterDesc(String column) { + private FilterDesc getTestFilterDesc(String column) throws Exception { ArrayList children1 = new ArrayList(); children1.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, column, "", false)); @@ -208,7 +208,7 @@ private FilterDesc getTestFilterDesc(String column) { } @SuppressWarnings("unchecked") - private void populateMapPlan1(Table src) { + private void populateMapPlan1(Table src) throws Exception { Operator op2 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator + "mapplan1.out"), Utilities.defaultTd, true)); @@ -219,7 +219,7 @@ private void populateMapPlan1(Table src) { } @SuppressWarnings("unchecked") - private void populateMapPlan2(Table src) { + private void populateMapPlan2(Table src) throws Exception { Operator op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator + "mapplan2.out"), Utilities.defaultTd, false)); @@ -267,7 +267,7 @@ private void populateMapRedPlan1(Table src) throws SemanticException { } @SuppressWarnings("unchecked") - private void populateMapRedPlan2(Table src) throws SemanticException { + private void populateMapRedPlan2(Table src) throws Exception { ArrayList outputColumns = new ArrayList(); for (int i = 0; i < 2; i++) { outputColumns.add("_col" + i); @@ -423,7 +423,7 @@ private void populateMapRedPlan5(Table src) throws SemanticException { } @SuppressWarnings("unchecked") - private void populateMapRedPlan6(Table src) throws SemanticException { + private void populateMapRedPlan6(Table src) throws Exception { // map-side work ArrayList outputColumns = new ArrayList(); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java index 1bb6eaf..6db3c19 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExpressionEvaluator.java @@ -122,12 +122,12 @@ public void testExprNodeColumnEvaluator() throws Throwable { } } - private static ExprNodeDesc getListIndexNode(ExprNodeDesc node, int index) { + private static ExprNodeDesc getListIndexNode(ExprNodeDesc node, int index) throws Exception { return getListIndexNode(node, new ExprNodeConstantDesc(index)); } private static ExprNodeDesc getListIndexNode(ExprNodeDesc node, - ExprNodeDesc index) { + ExprNodeDesc index) throws Exception { ArrayList children = new ArrayList(2); children.add(node); children.add(index); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index 655c3d0..068bdee 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -379,7 +380,7 @@ public void testGetTypeInfoForPrimitiveCategory() { protected void tearDown() { } - public void testIsRankingFunction() { + public void testIsRankingFunction() throws Exception { Assert.assertTrue(FunctionRegistry.isRankingFunction("rank")); Assert.assertTrue(FunctionRegistry.isRankingFunction("dense_rank")); Assert.assertTrue(FunctionRegistry.isRankingFunction("percent_rank")); @@ -387,7 +388,7 @@ public void testIsRankingFunction() { Assert.assertFalse(FunctionRegistry.isRankingFunction("min")); } - public void testImpliesOrder() { + public void testImpliesOrder() throws Exception { Assert.assertTrue(FunctionRegistry.impliesOrder("rank")); Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank")); Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank")); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/SampleTezSessionState.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/SampleTezSessionState.java index dc9315f..ac861c6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/SampleTezSessionState.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/SampleTezSessionState.java @@ -24,7 +24,7 @@ import javax.security.auth.login.LoginException; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.tez.dag.api.TezException; @@ -60,9 +60,8 @@ public void setOpen(boolean open) { public void open(HiveConf conf) throws IOException, LoginException, URISyntaxException, TezException { this.hiveConf = conf; - UserGroupInformation ugi; - ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - user = ShimLoader.getHadoopShims().getShortUserName(ugi); + UserGroupInformation ugi = Utils.getUGIForConf(conf); + user = ugi.getShortUserName(); this.doAsEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java new file mode 100644 index 0000000..3a47673 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/AbstractTestParquetDirect.java @@ -0,0 +1,154 @@ +package org.apache.hadoop.hive.ql.io.parquet; + +import com.google.common.base.Joiner; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; +import parquet.hadoop.ParquetWriter; +import parquet.hadoop.api.WriteSupport; +import parquet.io.api.RecordConsumer; +import parquet.schema.MessageType; + +public abstract class AbstractTestParquetDirect { + + public static FileSystem localFS = null; + + @BeforeClass + public static void initializeFS() throws IOException { + localFS = FileSystem.getLocal(new Configuration()); + } + + @Rule + public final TemporaryFolder tempDir = new TemporaryFolder(); + + + public interface DirectWriter { + public void write(RecordConsumer consumer); + } + + public static class DirectWriteSupport extends WriteSupport { + private RecordConsumer recordConsumer; + private final MessageType type; + private final DirectWriter writer; + private final Map metadata; + + private DirectWriteSupport(MessageType type, DirectWriter writer, + Map metadata) { + this.type = type; + this.writer = writer; + this.metadata = metadata; + } + + @Override + public WriteContext init(Configuration configuration) { + return new WriteContext(type, metadata); + } + + @Override + public void prepareForWrite(RecordConsumer recordConsumer) { + this.recordConsumer = recordConsumer; + } + + @Override + public void write(Void record) { + writer.write(recordConsumer); + } + } + + public Path writeDirect(String name, MessageType type, DirectWriter writer) + throws IOException { + File temp = tempDir.newFile(name + ".parquet"); + temp.deleteOnExit(); + temp.delete(); + + Path path = new Path(temp.getPath()); + + ParquetWriter parquetWriter = new ParquetWriter(path, + new DirectWriteSupport(type, writer, new HashMap())); + parquetWriter.write(null); + parquetWriter.close(); + + return path; + } + + public static ArrayWritable record(Writable... fields) { + return new ArrayWritable(Writable.class, fields); + } + + public static ArrayWritable list(Writable... elements) { + // the ObjectInspector for array and map expects an extra layer + return new ArrayWritable(ArrayWritable.class, new ArrayWritable[] { + new ArrayWritable(Writable.class, elements) + }); + } + + public static String toString(ArrayWritable arrayWritable) { + Writable[] writables = arrayWritable.get(); + String[] strings = new String[writables.length]; + for (int i = 0; i < writables.length; i += 1) { + if (writables[i] instanceof ArrayWritable) { + strings[i] = toString((ArrayWritable) writables[i]); + } else { + strings[i] = String.valueOf(writables[i]); + } + } + return Arrays.toString(strings); + } + + public static void assertEquals(String message, ArrayWritable expected, + ArrayWritable actual) { + Assert.assertEquals(message, toString(expected), toString(actual)); + } + + public static List read(Path parquetFile) throws IOException { + List records = new ArrayList(); + + RecordReader reader = new MapredParquetInputFormat(). + getRecordReader(new FileSplit( + parquetFile, 0, fileLength(parquetFile), (String[]) null), + new JobConf(), null); + + Void alwaysNull = reader.createKey(); + ArrayWritable record = reader.createValue(); + while (reader.next(alwaysNull, record)) { + records.add(record); + record = reader.createValue(); // a new value so the last isn't clobbered + } + + return records; + } + + public static long fileLength(Path localFile) throws IOException { + return localFS.getFileStatus(localFile).getLen(); + } + + private static final Joiner COMMA = Joiner.on(","); + public void deserialize(Writable record, List columnNames, + List columnTypes) throws Exception { + ParquetHiveSerDe serde = new ParquetHiveSerDe(); + Properties props = new Properties(); + props.setProperty(serdeConstants.LIST_COLUMNS, COMMA.join(columnNames)); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, COMMA.join(columnTypes)); + serde.initialize(null, props); + serde.deserialize(record); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java new file mode 100644 index 0000000..f7f3e57 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java @@ -0,0 +1,614 @@ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.util.List; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.junit.Assert; +import org.junit.Test; +import parquet.io.api.RecordConsumer; +import parquet.schema.MessageType; +import parquet.schema.Types; + +import static parquet.schema.OriginalType.LIST; +import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; +import static parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; +import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; +import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; + +public class TestArrayCompatibility extends AbstractTestParquetDirect { + + @Test + public void testUnannotatedListOfPrimitives() throws Exception { + MessageType fileSchema = Types.buildMessage() + .repeated(INT32).named("list_of_ints") + .named("UnannotatedListOfPrimitives"); + + Path test = writeDirect("UnannotatedListOfPrimitives", + fileSchema, + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("list_of_ints", 0); + + rc.addInteger(34); + rc.addInteger(35); + rc.addInteger(36); + + rc.endField("list_of_ints", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + new IntWritable(34), new IntWritable(35), new IntWritable(36))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testUnannotatedListOfGroups() throws Exception { + Path test = writeDirect("UnannotatedListOfGroups", + Types.buildMessage() + .repeatedGroup() + .required(FLOAT).named("x") + .required(FLOAT).named("y") + .named("list_of_points") + .named("UnannotatedListOfGroups"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("list_of_points", 0); + + rc.startGroup(); + rc.startField("x", 0); + rc.addFloat(1.0f); + rc.endField("x", 0); + rc.startField("y", 1); + rc.addFloat(1.0f); + rc.endField("y", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("x", 0); + rc.addFloat(2.0f); + rc.endField("x", 0); + rc.startField("y", 1); + rc.addFloat(2.0f); + rc.endField("y", 1); + rc.endGroup(); + + rc.endField("list_of_points", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new FloatWritable(1.0f), new FloatWritable(1.0f)), + record(new FloatWritable(2.0f), new FloatWritable(2.0f)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testThriftPrimitiveInList() throws Exception { + Path test = writeDirect("ThriftPrimitiveInList", + Types.buildMessage() + .requiredGroup().as(LIST) + .repeated(INT32).named("list_of_ints_tuple") + .named("list_of_ints") + .named("ThriftPrimitiveInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("list_of_ints", 0); + + rc.startGroup(); + rc.startField("list_of_ints_tuple", 0); + + rc.addInteger(34); + rc.addInteger(35); + rc.addInteger(36); + + rc.endField("list_of_ints_tuple", 0); + rc.endGroup(); + + rc.endField("list_of_ints", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + new IntWritable(34), new IntWritable(35), new IntWritable(36))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", expected, records.get(0)); + } + + @Test + public void testThriftSingleFieldGroupInList() throws Exception { + // this tests the case where older data has an ambiguous structure, but the + // correct interpretation can be determined from the repeated name + + Path test = writeDirect("ThriftSingleFieldGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .required(INT64).named("count") + .named("single_element_groups_tuple") + .named("single_element_groups") + .named("ThriftSingleFieldGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("single_element_groups", 0); + + rc.startGroup(); + rc.startField("single_element_groups_tuple", 0); // start writing array contents + + rc.startGroup(); + rc.startField("count", 0); + rc.addLong(1234L); + rc.endField("count", 0); + rc.endGroup(); + + rc.startGroup(); + rc.startField("count", 0); + rc.addLong(2345L); + rc.endField("count", 0); + rc.endGroup(); + + rc.endField("single_element_groups_tuple", 0); // finished writing array contents + rc.endGroup(); + + rc.endField("single_element_groups", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new LongWritable(1234L)), + record(new LongWritable(2345L)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testAvroPrimitiveInList() throws Exception { + Path test = writeDirect("AvroPrimitiveInList", + Types.buildMessage() + .requiredGroup().as(LIST) + .repeated(INT32).named("array") + .named("list_of_ints") + .named("AvroPrimitiveInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("list_of_ints", 0); + + rc.startGroup(); + rc.startField("array", 0); + + rc.addInteger(34); + rc.addInteger(35); + rc.addInteger(36); + + rc.endField("array", 0); + rc.endGroup(); + + rc.endField("list_of_ints", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + new IntWritable(34), new IntWritable(35), new IntWritable(36))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", expected, records.get(0)); + } + + @Test + public void testAvroSingleFieldGroupInList() throws Exception { + // this tests the case where older data has an ambiguous structure, but the + // correct interpretation can be determined from the repeated name, "array" + + Path test = writeDirect("AvroSingleFieldGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .required(INT64).named("count") + .named("array") + .named("single_element_groups") + .named("AvroSingleFieldGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("single_element_groups", 0); + + rc.startGroup(); + rc.startField("array", 0); // start writing array contents + + rc.startGroup(); + rc.startField("count", 0); + rc.addLong(1234L); + rc.endField("count", 0); + rc.endGroup(); + + rc.startGroup(); + rc.startField("count", 0); + rc.addLong(2345L); + rc.endField("count", 0); + rc.endGroup(); + + rc.endField("array", 0); // finished writing array contents + rc.endGroup(); + + rc.endField("single_element_groups", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new LongWritable(1234L)), + record(new LongWritable(2345L)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testAmbiguousSingleFieldGroupInList() throws Exception { + // this tests the case where older data has an ambiguous list and is not + // named indicating that the source considered the group significant + + Path test = writeDirect("SingleFieldGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .required(INT64).named("count") + .named("single_element_group") + .named("single_element_groups") + .named("SingleFieldGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("single_element_groups", 0); + + rc.startGroup(); + rc.startField("single_element_group", 0); // start writing array contents + + rc.startGroup(); + rc.startField("count", 0); + rc.addLong(1234L); + rc.endField("count", 0); + rc.endGroup(); + + rc.startGroup(); + rc.startField("count", 0); + rc.addLong(2345L); + rc.endField("count", 0); + rc.endGroup(); + + rc.endField("single_element_group", 0); // finished writing array contents + rc.endGroup(); + + rc.endField("single_element_groups", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + new LongWritable(1234L), + new LongWritable(2345L))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testMultiFieldGroupInList() throws Exception { + // tests the missing element layer, detected by a multi-field group + + Path test = writeDirect("MultiFieldGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .required(DOUBLE).named("latitude") + .required(DOUBLE).named("longitude") + .named("element") // should not affect schema conversion + .named("locations") + .named("MultiFieldGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("locations", 0); + + rc.startGroup(); + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(0.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(180.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); + + rc.endField("locations", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new DoubleWritable(0.0), new DoubleWritable(0.0)), + record(new DoubleWritable(0.0), new DoubleWritable(180.0)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testNewOptionalGroupInList() throws Exception { + Path test = writeDirect("NewOptionalGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .optionalGroup() + .required(DOUBLE).named("latitude") + .required(DOUBLE).named("longitude") + .named("element") + .named("list") + .named("locations") + .named("NewOptionalGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("locations", 0); + + rc.startGroup(); + rc.startField("list", 0); // start writing array contents + + // write a non-null element + rc.startGroup(); // array level + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(0.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); // array level + + // write a null element (element field is omitted) + rc.startGroup(); // array level + rc.endGroup(); // array level + + // write a second non-null element + rc.startGroup(); // array level + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(180.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); // array level + + rc.endField("list", 0); // finished writing array contents + rc.endGroup(); + + rc.endField("locations", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new DoubleWritable(0.0), new DoubleWritable(0.0)), + null, + record(new DoubleWritable(0.0), new DoubleWritable(180.0)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testNewRequiredGroupInList() throws Exception { + Path test = writeDirect("NewRequiredGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .requiredGroup() + .required(DOUBLE).named("latitude") + .required(DOUBLE).named("longitude") + .named("element") + .named("list") + .named("locations") + .named("NewRequiredGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("locations", 0); + + rc.startGroup(); + rc.startField("list", 0); // start writing array contents + + // write a non-null element + rc.startGroup(); // array level + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(180.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); // array level + + // write a second non-null element + rc.startGroup(); // array level + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(0.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); // array level + + rc.endField("list", 0); // finished writing array contents + rc.endGroup(); + + rc.endField("locations", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new DoubleWritable(0.0), new DoubleWritable(180.0)), + record(new DoubleWritable(0.0), new DoubleWritable(0.0)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + + @Test + public void testHiveRequiredGroupInList() throws Exception { + // this matches the list structure that Hive writes + Path test = writeDirect("HiveRequiredGroupInList", + Types.buildMessage() + .optionalGroup().as(LIST) + .repeatedGroup() + .requiredGroup() + .required(DOUBLE).named("latitude") + .required(DOUBLE).named("longitude") + .named("element") + .named("bag") + .named("locations") + .named("HiveRequiredGroupInList"), + new DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("locations", 0); + + rc.startGroup(); + rc.startField("bag", 0); // start writing array contents + + // write a non-null element + rc.startGroup(); // array level + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(180.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); // array level + + // write a second non-null element + rc.startGroup(); // array level + rc.startField("element", 0); + + rc.startGroup(); + rc.startField("latitude", 0); + rc.addDouble(0.0); + rc.endField("latitude", 0); + rc.startField("longitude", 1); + rc.addDouble(0.0); + rc.endField("longitude", 1); + rc.endGroup(); + + rc.endField("element", 0); + rc.endGroup(); // array level + + rc.endField("bag", 0); // finished writing array contents + rc.endGroup(); + + rc.endField("locations", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new DoubleWritable(0.0), new DoubleWritable(180.0)), + record(new DoubleWritable(0.0), new DoubleWritable(0.0)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java new file mode 100644 index 0000000..a693aff --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java @@ -0,0 +1,518 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.*; +import org.junit.Before; +import org.junit.Test; +import org.mockito.InOrder; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import parquet.io.api.Binary; +import parquet.io.api.RecordConsumer; +import parquet.schema.MessageType; +import parquet.schema.MessageTypeParser; + +import java.io.UnsupportedEncodingException; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; + +public class TestDataWritableWriter { + @Mock private RecordConsumer mockRecordConsumer; + private InOrder inOrder; + + @Before + public void initMocks() { + MockitoAnnotations.initMocks(this); + inOrder = inOrder(mockRecordConsumer); + } + + private void startMessage() { + inOrder.verify(mockRecordConsumer).startMessage(); + } + + private void endMessage() { + inOrder.verify(mockRecordConsumer).endMessage(); + verifyNoMoreInteractions(mockRecordConsumer); + } + + private void startField(String name, int index) { + inOrder.verify(mockRecordConsumer).startField(name, index); + } + + private void endField(String name, int index) { + inOrder.verify(mockRecordConsumer).endField(name, index); + } + + private void addInteger(int value) { + inOrder.verify(mockRecordConsumer).addInteger(value); + } + + private void addFloat(float value) { + inOrder.verify(mockRecordConsumer).addFloat(value); + } + + private void addDouble(double value) { + inOrder.verify(mockRecordConsumer).addDouble(value); + } + + private void addBoolean(boolean value) { + inOrder.verify(mockRecordConsumer).addBoolean(value); + } + + private void addString(String value) { + inOrder.verify(mockRecordConsumer).addBinary(Binary.fromString(value)); + } + + private void startGroup() { + inOrder.verify(mockRecordConsumer).startGroup(); + } + + private void endGroup() { + inOrder.verify(mockRecordConsumer).endGroup(); + } + + private Writable createNull() { return null; } + + private IntWritable createInt(int value) { + return new IntWritable(value); + } + + private FloatWritable createFloat(float value) { + return new FloatWritable(value); + } + + private DoubleWritable createDouble(double value) { + return new DoubleWritable(value); + } + + private BooleanWritable createBoolean(boolean value) { + return new BooleanWritable(value); + } + + private BytesWritable createString(String value) throws UnsupportedEncodingException { + return new BytesWritable(value.getBytes("UTF-8")); + } + + private ArrayWritable createGroup(Writable...values) { + return new ArrayWritable(Writable.class, values); + } + + private ArrayWritable createArray(Writable...values) { + return new ArrayWritable(Writable.class, createGroup(values).get()); + } + + private void writeParquetRecord(String schemaStr, ArrayWritable record) { + MessageType schema = MessageTypeParser.parseMessageType(schemaStr); + DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, schema); + hiveParquetWriter.write(record); + } + + @Test + public void testSimpleType() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional int32 int;\n" + + " optional double double;\n" + + " optional boolean boolean;\n" + + " optional float float;\n" + + " optional binary string;\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createInt(1), + createDouble(1.0), + createBoolean(true), + createFloat(1.0f), + createString("one") + ); + + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + + // Verify record was written correctly to Parquet + startMessage(); + startField("int", 0); + addInteger(1); + endField("int", 0); + startField("double", 1); + addDouble(1.0); + endField("double", 1); + startField("boolean", 2); + addBoolean(true); + endField("boolean", 2); + startField("float", 3); + addFloat(1.0f); + endField("float", 3); + startField("string", 4); + addString("one"); + endField("string", 4); + endMessage(); + } + + @Test + public void testStructType() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group structCol {\n" + + " optional int32 a;\n" + + " optional double b;\n" + + " optional boolean c;\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createInt(1), + createDouble(1.0), + createBoolean(true) + ) + ); + + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + + // Verify record was written correctly to Parquet + startMessage(); + startField("structCol", 0); + startGroup(); + startField("a", 0); + addInteger(1); + endField("a", 0); + startField("b", 1); + addDouble(1.0); + endField("b", 1); + startField("c", 2); + addBoolean(true); + endField("c", 2); + endGroup(); + endField("structCol", 0); + endMessage(); + } + + @Test + public void testArrayType() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group arrayCol (LIST) {\n" + + " repeated group bag {\n" + + " optional int32 array_element;\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createInt(1), + createNull(), + createInt(2) + ) + ) + ); + + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + + // Verify record was written correctly to Parquet + startMessage(); + startField("arrayCol", 0); + startGroup(); + startField("bag", 0); + startGroup(); + startField("array_element", 0); + addInteger(1); + endField("array_element", 0); + endGroup(); + startGroup(); + endGroup(); + startGroup(); + startField("array_element", 0); + addInteger(2); + endField("array_element", 0); + endGroup(); + endField("bag", 0); + endGroup(); + endField("arrayCol", 0); + endMessage(); + } + + @Test + public void testMapType() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group mapCol (MAP) {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required binary key;\n" + + " optional int32 value;\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createArray( + createString("key1"), + createInt(1) + ), + createArray( + createString("key2"), + createInt(2) + ), + createArray( + createString("key3"), + createNull() + ) + ) + ) + ); + + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + + // Verify record was written correctly to Parquet + startMessage(); + startField("mapCol", 0); + startGroup(); + startField("map", 0); + startGroup(); + startField("key", 0); + addString("key1"); + endField("key", 0); + startField("value", 1); + addInteger(1); + endField("value", 1); + endGroup(); + startGroup(); + startField("key", 0); + addString("key2"); + endField("key", 0); + startField("value", 1); + addInteger(2); + endField("value", 1); + endGroup(); + startGroup(); + startField("key", 0); + addString("key3"); + endField("key", 0); + endGroup(); + endField("map", 0); + endGroup(); + endField("mapCol", 0); + endMessage(); + } + + @Test + public void testArrayOfArrays() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group array_of_arrays (LIST) {\n" + + " repeated group array {\n" + + " required group element (LIST) {\n" + + " repeated group array {\n" + + " required int32 element;\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createGroup( + createArray( + createInt(1), + createInt(2) + ) + ) + ) + ) + ); + + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + + // Verify record was written correctly to Parquet + startMessage(); + startField("array_of_arrays", 0); + startGroup(); + startField("array", 0); + startGroup(); + startField("element", 0); + startGroup(); + startField("array", 0); + startGroup(); + startField("element", 0); + addInteger(1); + endField("element", 0); + endGroup(); + startGroup(); + startField("element", 0); + addInteger(2); + endField("element", 0); + endGroup(); + endField("array", 0); + endGroup(); + endField("element", 0); + endGroup(); + endField("array", 0); + endGroup(); + endField("array_of_arrays", 0); + endMessage(); + } + + @Test + public void testGroupFieldIsNotArrayWritable() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group a {\n" + + " optional int32 b;\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createInt(1) + ); + + try { + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + fail(); + } catch (RuntimeException e) { + assertEquals("Parquet record is malformed: Field value is not an ArrayWritable object: " + + "optional group a {\n optional int32 b;\n}", e.getMessage()); + } + } + + @Test + public void testArrayGroupElementIsNotArrayWritable() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group array_of_arrays (LIST) {\n" + + " repeated group array {\n" + + " required group element (LIST) {\n" + + " required int32 element;\n" + + " }\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createInt(1) + ) + ) + ); + + try { + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + fail(); + } catch (RuntimeException e) { + assertEquals("Parquet record is malformed: Field value is not an ArrayWritable object: " + + "required group element (LIST) {\n required int32 element;\n}", e.getMessage()); + } + } + + @Test + public void testMapElementIsNotArrayWritable() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group mapCol (MAP) {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required binary key;\n" + + " optional group value {\n" + + " required int32 value;" + + " }\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createGroup( + createString("key1"), + createInt(1) + ) + ) + ) + ); + + try { + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + fail(); + } catch (RuntimeException e) { + assertEquals( + "Parquet record is malformed: Field value is not an ArrayWritable object: " + + "optional group value {\n required int32 value;\n}", e.getMessage()); + } + } + + @Test + public void testMapKeyValueIsNotArrayWritable() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group mapCol (MAP) {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required binary key;\n" + + " optional int32 value;\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createString("key1"), + createInt(1) + ) + ) + ); + + try { + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + fail(); + } catch (RuntimeException e) { + assertEquals("Parquet record is malformed: Map key-value pair is not an ArrayWritable object on record 0", e.getMessage()); + } + } + + @Test + public void testMapKeyValueIsNull() throws Exception { + String schemaStr = "message hive_schema {\n" + + " optional group mapCol (MAP) {\n" + + " repeated group map (MAP_KEY_VALUE) {\n" + + " required binary key;\n" + + " optional int32 value;\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + createGroup( + createArray( + createNull() + ) + ) + ); + + try { + // Write record to Parquet format + writeParquetRecord(schemaStr, hiveRecord); + fail(); + } catch (RuntimeException e) { + assertEquals("Parquet record is malformed: Map key-value pair is null on record 0", e.getMessage()); + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java new file mode 100644 index 0000000..ca48050 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java @@ -0,0 +1,544 @@ +package org.apache.hadoop.hive.ql.io.parquet; + +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; +import parquet.io.api.Binary; +import parquet.io.api.RecordConsumer; +import parquet.schema.Types; + +import static parquet.schema.OriginalType.*; +import static parquet.schema.PrimitiveType.PrimitiveTypeName.*; + +public class TestMapStructures extends AbstractTestParquetDirect { + + @Test + public void testStringMapRequiredPrimitive() throws Exception { + Path test = writeDirect("StringMapRequiredPrimitive", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .required(BINARY).as(UTF8).named("key") + .required(INT32).named("value") + .named("key_value") + .named("votes") + .named("StringMapRequiredPrimitive"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("votes", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("lettuce")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(34); + rc.endField("value", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("cabbage")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(18); + rc.endField("value", 1); + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("votes", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new Text("lettuce"), new IntWritable(34)), + record(new Text("cabbage"), new IntWritable(18)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("votes"), + Arrays.asList("map")); + } + + @Test + public void testStringMapOptionalPrimitive() throws Exception { + Path test = writeDirect("StringMapOptionalPrimitive", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .required(BINARY).as(UTF8).named("key") + .optional(INT32).named("value") + .named("key_value") + .named("votes") + .named("StringMapOptionalPrimitive"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("votes", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("lettuce")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(34); + rc.endField("value", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("kale")); + rc.endField("key", 0); + // no value for kale + rc.endGroup(); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("cabbage")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(18); + rc.endField("value", 1); + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("votes", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new Text("lettuce"), new IntWritable(34)), + record(new Text("kale"), null), + record(new Text("cabbage"), new IntWritable(18)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("votes"), + Arrays.asList("map")); + } + + @Test + public void testStringMapOfOptionalArray() throws Exception { + // tests a multimap structure + + Path test = writeDirect("StringMapOfOptionalArray", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .required(BINARY).as(UTF8).named("key") + .optionalGroup().as(LIST) + .repeatedGroup() + .optional(BINARY).as(UTF8).named("element") + .named("list") + .named("value") + .named("key_value") + .named("examples") + .named("StringMapOfOptionalArray"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("examples", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("green")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.startGroup(); + rc.startField("list", 0); + rc.startGroup(); + rc.startField("element", 0); + rc.addBinary(Binary.fromString("lettuce")); + rc.endField("element", 0); + rc.endGroup(); + rc.startGroup(); + rc.startField("element", 0); + rc.addBinary(Binary.fromString("kale")); + rc.endField("element", 0); + rc.endGroup(); + rc.startGroup(); + // adds a null element + rc.endGroup(); + rc.endField("list", 0); + rc.endGroup(); + rc.endField("value", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("brown")); + rc.endField("key", 0); + // no values array + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("examples", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new Text("green"), list(new Text("lettuce"), new Text("kale"), null)), + record(new Text("brown"), null))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("examples"), + Arrays.asList("map>")); + } + + @Test + public void testStringMapOfOptionalIntArray() throws Exception { + // tests a multimap structure for PARQUET-26 + + Path test = writeDirect("StringMapOfOptionalIntArray", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .required(BINARY).as(UTF8).named("key") + .optionalGroup().as(LIST) + .repeatedGroup() + .optional(INT32).named("element") + .named("list") + .named("value") + .named("key_value") + .named("examples") + .named("StringMapOfOptionalIntArray"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("examples", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("low")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.startGroup(); + rc.startField("list", 0); + rc.startGroup(); + rc.startField("element", 0); + rc.addInteger(34); + rc.endField("element", 0); + rc.endGroup(); + rc.startGroup(); + rc.startField("element", 0); + rc.addInteger(35); + rc.endField("element", 0); + rc.endGroup(); + rc.startGroup(); + // adds a null element + rc.endGroup(); + rc.endField("list", 0); + rc.endGroup(); + rc.endField("value", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("high")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.startGroup(); + rc.startField("list", 0); + rc.startGroup(); + rc.startField("element", 0); + rc.addInteger(340); + rc.endField("element", 0); + rc.endGroup(); + rc.startGroup(); + rc.startField("element", 0); + rc.addInteger(360); + rc.endField("element", 0); + rc.endGroup(); + rc.endField("list", 0); + rc.endGroup(); + rc.endField("value", 1); + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("examples", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new Text("low"), list(new IntWritable(34), new IntWritable(35), null)), + record(new Text("high"), list(new IntWritable(340), new IntWritable(360))))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("examples"), + Arrays.asList("map>")); + } + + @Test + public void testMapWithComplexKey() throws Exception { + Path test = writeDirect("MapWithComplexKey", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .requiredGroup() + .required(INT32).named("x") + .required(INT32).named("y") + .named("key") + .optional(DOUBLE).named("value") + .named("key_value") + .named("matrix") + .named("MapWithComplexKey"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("matrix", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.startGroup(); + rc.startField("x", 0); + rc.addInteger(7); + rc.endField("x", 0); + rc.startField("y", 1); + rc.addInteger(22); + rc.endField("y", 1); + rc.endGroup(); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addDouble(3.14); + rc.endField("value", 1); + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("matrix", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list(record( + record(new IntWritable(7), new IntWritable(22)), + new DoubleWritable(3.14)))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("matrix"), + Arrays.asList("map,bigint>")); + } + + @Test + public void testDoubleMapWithStructValue() throws Exception { + Path test = writeDirect("DoubleMapWithStructValue", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .optional(DOUBLE).named("key") + .optionalGroup() + .required(INT32).named("x") + .required(INT32).named("y") + .named("value") + .named("key_value") + .named("approx") + .named("DoubleMapWithStructValue"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("approx", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.addDouble(3.14); + rc.endField("key", 0); + rc.startField("value", 1); + rc.startGroup(); + rc.startField("x", 0); + rc.addInteger(7); + rc.endField("x", 0); + rc.startField("y", 1); + rc.addInteger(22); + rc.endField("y", 1); + rc.endGroup(); + rc.endField("value", 1); + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("approx", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list(record( + new DoubleWritable(3.14), + record(new IntWritable(7), new IntWritable(22))))); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("approx"), + Arrays.asList("map>")); + } + + @Test + public void testNestedMap() throws Exception { + Path test = writeDirect("DoubleMapWithStructValue", + Types.buildMessage() + .optionalGroup().as(MAP) + .repeatedGroup() + .optional(BINARY).as(UTF8).named("key") + .optionalGroup().as(MAP) + .repeatedGroup() + .optional(BINARY).as(UTF8).named("key") + .required(INT32).named("value") + .named("key_value") + .named("value") + .named("key_value") + .named("map_of_maps") + .named("NestedMap"), + new TestArrayCompatibility.DirectWriter() { + @Override + public void write(RecordConsumer rc) { + rc.startMessage(); + rc.startField("map_of_maps", 0); + + rc.startGroup(); + rc.startField("key_value", 0); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("a")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.startGroup(); + rc.startField("key_value", 0); + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("b")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(1); + rc.endField("value", 1); + rc.endGroup(); + rc.endField("key_value", 0); + rc.endGroup(); + rc.endField("value", 1); + rc.endGroup(); + + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("b")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.startGroup(); + rc.startField("key_value", 0); + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("a")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(-1); + rc.endField("value", 1); + rc.endGroup(); + rc.startGroup(); + rc.startField("key", 0); + rc.addBinary(Binary.fromString("b")); + rc.endField("key", 0); + rc.startField("value", 1); + rc.addInteger(-2); + rc.endField("value", 1); + rc.endGroup(); + rc.endField("key_value", 0); + rc.endGroup(); + rc.endField("value", 1); + rc.endGroup(); + + rc.endField("key_value", 0); + rc.endGroup(); + + rc.endField("map_of_maps", 0); + rc.endMessage(); + } + }); + + ArrayWritable expected = record(list( + record(new Text("a"), list( + record(new Text("b"), new IntWritable(1)))), + record(new Text("b"), list( + record(new Text("a"), new IntWritable(-1)), + record(new Text("b"), new IntWritable(-2)))) + )); + + List records = read(test); + Assert.assertEquals("Should have only one record", 1, records.size()); + assertEquals("Should match expected record", + expected, records.get(0)); + + deserialize(records.get(0), + Arrays.asList("map_of_maps"), + Arrays.asList("map>")); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index c91644c..323fd33 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -22,14 +22,13 @@ import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.junit.Test; +import org.junit.*; +import parquet.filter2.predicate.FilterPredicate; import java.beans.XMLDecoder; import java.io.ByteArrayInputStream; @@ -39,6 +38,7 @@ import java.util.Set; import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNull; import static junit.framework.Assert.assertTrue; /** @@ -47,7 +47,7 @@ * to true and using a custom record reader that prints out the value of * hive.io.filter.expr.serialized in createRecordReader. This should be * replaced by generating the AST using the API and passing that in. - * + *

* In each case, the corresponding part of the where clause is in the * comment above the blob. */ @@ -76,12 +76,11 @@ private ExpressionTree constant(TruthValue val) { /** * Create a predicate leaf. This is used by another test. */ - public static - PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, - PredicateLeaf.Type type, - String columnName, - Object literal, - List literalList) { + public static PredicateLeaf createPredicateLeaf(PredicateLeaf.Operator operator, + PredicateLeaf.Type type, + String columnName, + Object literal, + List literalList) { return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName, literal, literalList); } @@ -134,7 +133,7 @@ public void testFlatten() throws Exception { ).toString()); assertEquals("(and leaf-1 leaf-2 leaf-3 leaf-4)", ExpressionBuilder.flatten(and(and(leaf(1), leaf(2)), - and(leaf(3),leaf(4)))).toString()); + and(leaf(3), leaf(4)))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4)", ExpressionBuilder.flatten(or(leaf(1), or(leaf(2), or(leaf(3), leaf(4))))).toString()); @@ -143,11 +142,11 @@ public void testFlatten() throws Exception { leaf(4))).toString()); assertEquals("(or leaf-1 leaf-2 leaf-3 leaf-4 leaf-5 leaf-6)", ExpressionBuilder.flatten(or(or(leaf(1), or(leaf(2), leaf(3))), - or(or(leaf(4),leaf(5)), leaf(6)))).toString()); + or(or(leaf(4), leaf(5)), leaf(6)))).toString()); assertEquals("(and (not leaf-1) leaf-2 (not leaf-3) leaf-4 (not leaf-5) leaf-6)", ExpressionBuilder.flatten(and(and(not(leaf(1)), and(leaf(2), not(leaf(3)))), and(and(leaf(4), not(leaf(5))), leaf(6))) - ).toString()); + ).toString()); assertEquals("(not (and leaf-1 leaf-2 leaf-3))", ExpressionBuilder.flatten(not(and(leaf(1), and(leaf(2), leaf(3)))) ).toString()); @@ -245,20 +244,20 @@ public void testCNF() throws Exception { private static void assertNoSharedNodes(ExpressionTree tree, Set seen - ) throws Exception { + ) throws Exception { if (seen.contains(tree) && tree.getOperator() != ExpressionTree.Operator.LEAF) { assertTrue("repeated node in expression " + tree, false); } seen.add(tree); if (tree.getChildren() != null) { - for(ExpressionTree child: tree.getChildren()) { + for (ExpressionTree child : tree.getChildren()) { assertNoSharedNodes(child, seen); } } } - private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) { + private ExprNodeGenericFuncDesc getFuncDesc(String xmlSerialized) { byte[] bytes; try { bytes = xmlSerialized.getBytes("UTF-8"); @@ -275,6 +274,7 @@ private ExprNodeGenericFuncDesc getFuncDesc (String xmlSerialized) { decoder.close(); } } + @Test public void testExpression1() throws Exception { // first_name = 'john' or @@ -749,59 +749,85 @@ public void testExpression1() throws Exception { List leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String[] conditions = new String[]{ + "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */ + "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */ + "lt(first_name, Binary{\"alan\"})", /* 'alan' > first_name */ + "not(lteq(id, 12))", /* id > 12 or */ + "not(lteq(id, 13))", /* 13 < id or */ + "lt(id, 15)", /* id < 15 or */ + "lt(id, 16)", /* 16 > id or */ + "eq(id, 30)", /* id <=> 30 */ + "eq(first_name, Binary{\"owen\"})" /* first_name <=> 'owen' */ + }; + String expected = String + .format("and(or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %8$s), " + + "or(or(or(or(or(or(or(%1$s, %2$s), %3$s), %4$s), %5$s), %6$s), %7$s), %9$s))", conditions); + assertEquals(expected, p.toString()); + PredicateLeaf leaf = leaves.get(0); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteral()); + assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("greg", leaf.getLiteral()); + assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral()); + assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(4); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral()); + assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(5); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral()); + assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(6); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral()); + assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(7); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(30L, leaf.getLiteral()); + assertEquals(30L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(30, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(8); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("owen", leaf.getLiteral()); + assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + " (not leaf-4) leaf-5 leaf-6 leaf-7)" + @@ -1017,30 +1043,46 @@ public void testExpression2() throws Exception { List leaves = sarg.getLeaves(); assertEquals(4, leaves.size()); + String[] conditions = new String[]{ + "eq(first_name, null)", /* first_name is null */ + "not(eq(first_name, Binary{\"sue\"}))", /* first_name <> 'sue' */ + "not(lt(id, 12))", /* id >= 12 */ + "lteq(id, 4)" /* id <= 4 */ + }; + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions); + assertEquals(expected, p.toString()); + PredicateLeaf leaf = leaves.get(0); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral()); - assertEquals(null, leaf.getLiteralList()); + assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC)); + assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("sue", leaf.getLiteral()); + assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(4L, leaf.getLiteral()); + assertEquals(4L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(4, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", sarg.getExpression().toString()); @@ -1436,25 +1478,41 @@ public void testExpression3() throws Exception { List leaves = sarg.getLeaves(); assertEquals(3, leaves.size()); + String[] conditions = new String[]{ + "lt(id, 45)", /* id between 23 and 45 */ + "not(lteq(id, 23))", /* id between 23 and 45 */ + "eq(first_name, Binary{\"alan\"})", /* first_name = 'alan' */ + "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */ + }; + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions); + assertEquals(expected, p.toString()); + PredicateLeaf leaf = leaves.get(0); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral()); - assertEquals(23L, leaf.getLiteralList().get(0)); - assertEquals(45L, leaf.getLiteralList().get(1)); + assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(23L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); + assertEquals(23, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); + assertEquals(45L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); + assertEquals(45, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral()); + assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("last_name", leaf.getColumnName()); - assertEquals("smith", leaf.getLiteral()); + assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); assertEquals("(and leaf-0 leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -1646,25 +1704,41 @@ id in (34,50) */ List leaves = sarg.getLeaves(); assertEquals(3, leaves.size()); + String[] conditions = new String[]{ + "not(eq(id, 12))", /* id <> 12 */ + "or(eq(first_name, Binary{\"john\"}), eq(first_name, Binary{\"sue\"}))", /* first_name in + ('john', 'sue') */ + "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */ + }; + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions); + assertEquals(expected, p.toString()); + PredicateLeaf leaf = leaves.get(0); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteralList().get(0)); - assertEquals("sue", leaf.getLiteralList().get(1)); + assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); + assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); + assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); + assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(34L, leaf.getLiteralList().get(0)); - assertEquals(50L, leaf.getLiteralList().get(1)); + assertEquals(34L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); + assertEquals(50L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); + assertEquals(34, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); + assertEquals(50, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); assertEquals("(and (not leaf-0) leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -1901,12 +1975,17 @@ public void testExpression5() throws Exception { List leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = + "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))"; + assertEquals(p.toString(), expected); + assertEquals(PredicateLeaf.Type.STRING, leaves.get(0).getType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaves.get(0).getOperator()); assertEquals("first_name", leaves.get(0).getColumnName()); - assertEquals("david", leaves.get(0).getLiteralList().get(0)); - assertEquals("greg", leaves.get(0).getLiteralList().get(1)); + assertEquals("david", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); + assertEquals("greg", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); assertEquals("leaf-0", sarg.getExpression().toString()); @@ -2378,59 +2457,90 @@ public void testExpression7() throws Exception { List leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 16))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 13)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 14)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 14)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 14)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 15)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 15)), lt(id, 17))), " + + "or(or(or(lt(id, 18), lt(id, 12)), lt(id, 15)), lt(id, 17)))"; + assertEquals(p.toString(), expected); + PredicateLeaf leaf = leaves.get(0); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(18L, leaf.getLiteral()); + assertEquals(18L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(18, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(10L, leaf.getLiteral()); + assertEquals(10L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(10, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral()); + assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral()); + assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(4); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(11L, leaf.getLiteral()); + assertEquals(11L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(11, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(5); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral()); + assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(6); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(14L, leaf.getLiteral()); + assertEquals(14L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(14, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(7); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral()); + assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); leaf = leaves.get(8); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(17L, leaf.getLiteral()); + assertEquals(17L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(17, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); assertEquals("(and" + " (or leaf-0 leaf-1 leaf-2 leaf-3)" + @@ -2512,6 +2622,9 @@ public void testExpression8() throws Exception { List leaves = sarg.getLeaves(); assertEquals(0, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + assertNull(p); + assertEquals("YES_NO_NULL", sarg.getExpression().toString()); } @@ -2648,115 +2761,115 @@ public void testExpression9() throws Exception { public void testExpression10() throws Exception { /* id >= 10 and not (10 > id) */ String exprStr = " \n" + - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " id \n"+ - " \n"+ - " \n"+ - " orc_people \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " int \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " 10 \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " boolean \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " id \n"+ - " \n"+ - " \n"+ - " orc_people \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " 10 \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ - " \n"+ + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " int \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " boolean \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " id \n" + + " \n" + + " \n" + + " orc_people \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 10 \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + ""; SearchArgumentImpl sarg = @@ -2764,11 +2877,16 @@ public void testExpression10() throws Exception { List leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(not(lt(id, 10)), not(lt(id, 10)))"; + assertEquals(expected, p.toString()); + assertEquals(PredicateLeaf.Type.INTEGER, leaves.get(0).getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaves.get(0).getOperator()); assertEquals("id", leaves.get(0).getColumnName()); - assertEquals(10L, leaves.get(0).getLiteral()); + assertEquals(10L, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.ORC)); + assertEquals(10, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.PARQUET)); assertEquals("(and (not leaf-0) (not leaf-0))", sarg.getExpression().toString()); @@ -2792,9 +2910,9 @@ public void testBuilder() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", 10) - .lessThanEquals("y", "hi") - .equals("z", 1.0) + .lessThan("x", 10) + .lessThanEquals("y", "hi") + .equals("z", 1.0) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 10)\n" + @@ -2803,12 +2921,12 @@ public void testBuilder() throws Exception { "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() - .startOr() - .isNull("x") - .between("y", 10, 20) - .in("z", 1, 2, 3) - .nullSafeEquals("a", "stinger") - .end() + .startOr() + .isNull("x") + .between("y", 10, 20) + .in("z", 1, 2, 3) + .nullSafeEquals("a", "stinger") + .end() .end() .build(); assertEquals("leaf-0 = (IS_NULL x)\n" + @@ -2816,6 +2934,12 @@ public void testBuilder() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = + "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " + + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); } @Test @@ -2823,24 +2947,25 @@ public void testBuilderComplexTypes() throws Exception { SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() - .lessThan("x", new DateWritable(10)) - .lessThanEquals("y", new HiveChar("hi", 10)) - .equals("z", HiveDecimal.create("1.0")) + .lessThan("x", new DateWritable(10)) + .lessThanEquals("y", new HiveChar("hi", 10)) + .equals("z", HiveDecimal.create("1.0")) .end() .build(); assertEquals("leaf-0 = (LESS_THAN x 1970-01-11)\n" + "leaf-1 = (LESS_THAN_EQUALS y hi)\n" + "leaf-2 = (EQUALS z 1)\n" + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); + assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() - .startOr() - .isNull("x") - .between("y", HiveDecimal.create(10), 20.0) - .in("z", (byte)1, (short)2, (int)3) - .nullSafeEquals("a", new HiveVarchar("stinger", 100)) - .end() + .startOr() + .isNull("x") + .between("y", HiveDecimal.create(10), 20.0) + .in("z", (byte) 1, (short) 2, (int) 3) + .nullSafeEquals("a", new HiveVarchar("stinger", 100)) + .end() .end() .build(); assertEquals("leaf-0 = (IS_NULL x)\n" + @@ -2848,6 +2973,11 @@ public void testBuilderComplexTypes() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + + "not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); } @Test @@ -2864,13 +2994,14 @@ public void testBuilderComplexTypes2() throws Exception { "leaf-1 = (LESS_THAN_EQUALS y hi)\n" + "leaf-2 = (EQUALS z 1.0)\n" + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); + assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() .startOr() .isNull("x") .between("y", new BigDecimal(10), 20.0) - .in("z", (byte)1, (short)2, (int)3) + .in("z", (byte) 1, (short) 2, (int) 3) .nullSafeEquals("a", new HiveVarchar("stinger", 100)) .end() .end() @@ -2880,6 +3011,11 @@ public void testBuilderComplexTypes2() throws Exception { "leaf-2 = (IN z 1 2 3)\n" + "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + + "not(eq(a, Binary{\"stinger\"})))"; + assertEquals(expected, p.toString()); } @Test @@ -2900,5 +3036,10 @@ public void testBuilderFloat() throws Exception { "leaf-3 = (EQUALS z 0.22)\n" + "leaf-4 = (EQUALS z1 0.22)\n" + "expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString()); + + FilterPredicate p = sarg.toFilterPredicate(); + String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " + + "0.22)), eq(z1, 0.22))"; + assertEquals(expected, p.toString()); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestBlockedUdf.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestBlockedUdf.java new file mode 100644 index 0000000..e2633de --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestBlockedUdf.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "import org.apache.hadoop.hive.ql.exec.FunctionInfo; +AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf; + +import static org.junit.Assert.*; + +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestBlockedUdf { + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + FunctionRegistry.setupPermissionsForBuiltinUDFs("", ""); + } + + /** + * Verify that UDF in the whitelist can be access + * @throws Exception + */ + @Test + public void testDefaultWhiteList() throws Exception { + assertEquals("", new HiveConf().getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST)); + assertEquals("", new HiveConf().getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST)); + FunctionRegistry.setupPermissionsForBuiltinUDFs("", ""); + assertEquals("substr", FunctionRegistry.getFunctionInfo("substr").getDisplayName()); + } + + /** + * Verify that UDF in the whitelist can be access + * @throws Exception + */ + @Test + public void testUdfInWhiteList() throws Exception { + Set funcNames = FunctionRegistry.getFunctionNames(); + funcNames.remove("reflect"); + FunctionRegistry.setupPermissionsForBuiltinUDFs(funcNames.toString(), ""); + assertEquals("substr", FunctionRegistry.getFunctionInfo("substr").getDisplayName()); + } + + /** + * Verify that UDF not in whitelist can't be accessed + * @throws Exception + */ + @Test (expected=SemanticException.class) + public void testUdfNotInWhiteList() throws Exception { + Set funcNames = FunctionRegistry.getFunctionNames(); + funcNames.remove("reflect"); + FunctionRegistry.setupPermissionsForBuiltinUDFs(funcNames.toString(), ""); + assertEquals("reflect", FunctionRegistry.getFunctionInfo("reflect").getDisplayName()); + } + + /** + * Verify that UDF in blacklist can't be accessed + * @throws Exception + */ + @Test (expected=SemanticException.class) + public void testUdfInBlackList() throws Exception { + FunctionRegistry.setupPermissionsForBuiltinUDFs("", "reflect"); + assertEquals("reflect", FunctionRegistry.getFunctionInfo("reflect").getDisplayName()); + } + + /** + * Verify that UDF in whitelist and blacklist can't be accessed + * @throws Exception + */ + @Test (expected=SemanticException.class) + public void testUdfInBlackAndWhiteList() throws Exception { + FunctionRegistry.setupPermissionsForBuiltinUDFs("reflect", "reflect"); + assertEquals("reflect", FunctionRegistry.getFunctionInfo("reflect").getDisplayName()); + } + + /** + * Test malformatted udf list setting + */ + @Test (expected=SemanticException.class) + public void testMalformattedListProperty() throws Exception { + FunctionRegistry.setupPermissionsForBuiltinUDFs(",,", " ,reflect,"); + assertEquals("reflect", FunctionRegistry.getFunctionInfo("reflect").getDisplayName()); + } + +} diff --git a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q index 5dda4c0..627fcc1 100644 --- a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q +++ b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets; diff --git a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q index acc028b..2f26de8 100644 --- a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q +++ b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. diff --git a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q index d814304..2c2e184 100644 --- a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q +++ b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table2_h23.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. diff --git a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q index a039925..439f351 100644 --- a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q +++ b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets; alter table tst1 clustered by (key) into 8 buckets; diff --git a/ql/src/test/queries/clientpositive/alter_table_cascade.q b/ql/src/test/queries/clientpositive/alter_table_cascade.q new file mode 100644 index 0000000..479fda4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/alter_table_cascade.q @@ -0,0 +1,137 @@ +SET hive.exec.dynamic.partition = true; +SET hive.exec.dynamic.partition.mode = nonstrict; + +-- SORT_QUERY_RESULTS + +drop table if exists alter_table_src; +drop table if exists alter_table_cascade; + +create table alter_table_src(c1 string, c2 string); +load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_src; + +create table alter_table_cascade (c1 string) partitioned by (p1 string, p2 string); + +insert overwrite table alter_table_cascade partition (p1, p2) + select c1, 'abc', '123' from alter_table_src + union all + select c1, null, '123' from alter_table_src; + +show partitions alter_table_cascade; +describe alter_table_cascade; +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_cascade where p1='abc'; +select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__'; + +-- add columns c2 by replace columns (for HIVE-6131) +-- reload data to existing partition __HIVE_DEFAULT_PARTITION__ +-- load data to a new partition xyz +-- querying data (form new or existing partition) should return non-NULL values for the new column +alter table alter_table_cascade replace columns (c1 string, c2 string) cascade; +load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__',p2='123'); +load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade; +describe alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_cascade where p1='xyz'; +select * from alter_table_cascade where p1='abc'; +select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__'; + +-- Change c2 to decimal(10,0), the change should cascaded to all partitions +-- the c2 value returned should be in decimal(10,0) +alter table alter_table_cascade change c2 c2 decimal(10,0) comment "change datatype" cascade; +describe alter_table_cascade; +describe alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_cascade where p1='xyz'; +select * from alter_table_cascade where p1='abc'; +select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__'; + +-- rename c1 to c2fromc1 and move it to after c2, the change should cascaded to all partitions +alter table alter_table_cascade change c1 c2fromc1 string comment "change position after" after c2 cascade; +describe alter_table_cascade; +describe alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); + +-- rename c2fromc1 back to c1 and move to first as c1, the change should cascaded to all partitions +alter table alter_table_cascade change c2fromc1 c1 string comment "change position first" first cascade; +describe alter_table_cascade; +describe alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); + +-- Try out replace columns, the change should cascaded to all partitions +alter table alter_table_cascade replace columns (c1 string) cascade; +describe alter_table_cascade; +describe alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_cascade where p1='xyz'; +select * from alter_table_cascade where p1='abc'; +select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__'; + +-- Try add columns, the change should cascaded to all partitions +alter table alter_table_cascade add columns (c2 decimal(14,4)) cascade; +describe alter_table_cascade; +describe alter_table_cascade partition (p1='xyz', p2='123'); +describe alter_table_cascade partition (p1='abc', p2='123'); +describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_cascade where p1='xyz'; +select * from alter_table_cascade where p1='abc'; +select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__'; + +-- + +drop table if exists alter_table_restrict; + +create table alter_table_restrict (c1 string) partitioned by (p1 string, p2 string); +insert overwrite table alter_table_restrict partition (p1, p2) + select c1, 'abc', '123' from alter_table_src + union all + select c1, null, '123' from alter_table_src; + +show partitions alter_table_restrict; +describe alter_table_restrict; +describe alter_table_restrict partition (p1='abc', p2='123'); +describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_restrict where p1='abc'; +select * from alter_table_restrict where p1='__HIVE_DEFAULT_PARTITION__'; + +-- add columns c2 by replace columns (for HIVE-6131) without cascade +-- only table column definition has changed, partitions do not +-- after replace, only new partition xyz return the value to new added columns but not existing partitions abc and __HIVE_DEFAULT_PARTITION__ +alter table alter_table_restrict replace columns (c1 string, c2 string) restrict; +load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='abc', p2='123'); +load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__',p2='123'); +load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='xyz', p2='123'); +describe alter_table_restrict; +describe alter_table_restrict partition (p1='xyz', p2='123'); +describe alter_table_restrict partition (p1='abc', p2='123'); +describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); +select * from alter_table_restrict where p1='xyz'; +select * from alter_table_restrict where p1='abc'; +select * from alter_table_restrict where p1='__HIVE_DEFAULT_PARTITION__'; + +-- Change c2 to decimal(10,0), only limited to table and new partition +alter table alter_table_restrict change c2 c2 decimal(10,0) restrict; +describe alter_table_restrict; +describe alter_table_restrict partition (p1='xyz', p2='123'); +describe alter_table_restrict partition (p1='abc', p2='123'); +describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); + +-- Try out replace columns, only limited to table and new partition +alter table alter_table_restrict replace columns (c1 string); +describe alter_table_restrict; +describe alter_table_restrict partition (p1='xyz', p2='123'); +describe alter_table_restrict partition (p1='abc', p2='123'); +describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); + +-- Try add columns, only limited to table and new partition +alter table alter_table_restrict add columns (c2 decimal(14,4)); +describe alter_table_restrict; +describe alter_table_restrict partition (p1='xyz', p2='123'); +describe alter_table_restrict partition (p1='abc', p2='123'); +describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123'); diff --git a/ql/src/test/queries/clientpositive/archive.q b/ql/src/test/queries/clientpositive/archive.q deleted file mode 100644 index a928a81..0000000 --- a/ql/src/test/queries/clientpositive/archive.q +++ /dev/null @@ -1,69 +0,0 @@ -set hive.archive.enabled = true; -set hive.enforce.bucketing = true; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc; -drop table tstsrcpart; - -create table tstsrc like src; -insert overwrite table tstsrc select key, value from src; - -create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets; - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12'; - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key; - -SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'; - -ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; - -CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS; - -INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; - -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; -ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; -ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); -SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; - - -CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING); - -INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; -ALTER TABLE old_name ARCHIVE PARTITION (ds='1'); -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2; -ALTER TABLE old_name RENAME TO new_name; -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2; - -drop table tstsrc; -drop table tstsrcpart; diff --git a/ql/src/test/queries/clientpositive/archive_corrupt.q b/ql/src/test/queries/clientpositive/archive_corrupt.q deleted file mode 100644 index cc9801d..0000000 --- a/ql/src/test/queries/clientpositive/archive_corrupt.q +++ /dev/null @@ -1,35 +0,0 @@ -USE default; - -set hive.archive.enabled = true; -set hive.enforce.bucketing = true; - -drop table tstsrcpart; - -create table tstsrcpart like srcpart; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11'); - -insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11'; - -insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12'; - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); - -alter table tstsrcpart archive partition (ds='2008-04-08', hr='11'); - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); - -alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11'); - diff --git a/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q b/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q index 90757f2..316276a 100644 --- a/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q +++ b/ql/src/test/queries/clientpositive/archive_excludeHadoop20.q @@ -3,8 +3,6 @@ set hive.enforce.bucketing = true; set hive.exec.submitviachild=true; set hive.exec.submit.local.task.via.child=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - drop table tstsrc; drop table tstsrcpart; diff --git a/ql/src/test/queries/clientpositive/auto_join14.q b/ql/src/test/queries/clientpositive/auto_join14.q index b282fb9..bfd942d 100644 --- a/ql/src/test/queries/clientpositive/auto_join14.q +++ b/ql/src/test/queries/clientpositive/auto_join14.q @@ -1,7 +1,7 @@ set hive.auto.convert.join = true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/auto_join14_hadoop20.q b/ql/src/test/queries/clientpositive/auto_join14_hadoop20.q index 235b7c1..99815f5 100644 --- a/ql/src/test/queries/clientpositive/auto_join14_hadoop20.q +++ b/ql/src/test/queries/clientpositive/auto_join14_hadoop20.q @@ -1,7 +1,7 @@ set hive.auto.convert.join = true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q index a66806f..734d2b3 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q @@ -1,3 +1,6 @@ +set hive.exec.submitviachild=true; +set hive.exec.submit.local.task.via.child=true; + -- small 2 part, 2 bucket & big 2 part, 4 bucket CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08'); diff --git a/ql/src/test/queries/clientpositive/combine2.q b/ql/src/test/queries/clientpositive/combine2.q index 615986d..376c37c 100644 --- a/ql/src/test/queries/clientpositive/combine2.q +++ b/ql/src/test/queries/clientpositive/combine2.q @@ -17,7 +17,7 @@ set hive.merge.smallfiles.avgsize=0; create table combine2(key string) partitioned by (value string); --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git a/ql/src/test/queries/clientpositive/combine2_hadoop20.q b/ql/src/test/queries/clientpositive/combine2_hadoop20.q index 9a9782a..d4b0d72 100644 --- a/ql/src/test/queries/clientpositive/combine2_hadoop20.q +++ b/ql/src/test/queries/clientpositive/combine2_hadoop20.q @@ -17,7 +17,7 @@ set hive.merge.smallfiles.avgsize=0; create table combine2(key string) partitioned by (value string); --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git a/ql/src/test/queries/clientpositive/combine2_win.q b/ql/src/test/queries/clientpositive/combine2_win.q index f6090bb..c6b8827 100644 --- a/ql/src/test/queries/clientpositive/combine2_win.q +++ b/ql/src/test/queries/clientpositive/combine2_win.q @@ -11,7 +11,7 @@ set hive.merge.smallfiles.avgsize=0; -- INCLUDE_OS_WINDOWS -- included only on windows because of difference in file name encoding logic --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table combine2(key string) partitioned by (value string); diff --git a/ql/src/test/queries/clientpositive/ctas.q b/ql/src/test/queries/clientpositive/ctas.q index 71af40e..b6de233 100644 --- a/ql/src/test/queries/clientpositive/ctas.q +++ b/ql/src/test/queries/clientpositive/ctas.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; diff --git a/ql/src/test/queries/clientpositive/ctas_hadoop20.q b/ql/src/test/queries/clientpositive/ctas_hadoop20.q index f39689d..e275b7b 100644 --- a/ql/src/test/queries/clientpositive/ctas_hadoop20.q +++ b/ql/src/test/queries/clientpositive/ctas_hadoop20.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; diff --git a/ql/src/test/queries/clientpositive/describe_database.q b/ql/src/test/queries/clientpositive/describe_database.q index efb052c..961bf55 100644 --- a/ql/src/test/queries/clientpositive/describe_database.q +++ b/ql/src/test/queries/clientpositive/describe_database.q @@ -1,3 +1,4 @@ create database test_db with dbproperties ('key1' = 'value1', 'key2' = 'value2'); desc database extended test_db; +desc schema extended test_db; drop database test_db; diff --git a/ql/src/test/queries/clientpositive/describe_database_json.q b/ql/src/test/queries/clientpositive/describe_database_json.q index 2f4cedc..67ca68f 100644 --- a/ql/src/test/queries/clientpositive/describe_database_json.q +++ b/ql/src/test/queries/clientpositive/describe_database_json.q @@ -6,6 +6,10 @@ DESCRIBE DATABASE jsondb1; DESCRIBE DATABASE EXTENDED jsondb1; +DESCRIBE SCHEMA jsondb1; + +DESCRIBE SCHEMA EXTENDED jsondb1; + SHOW DATABASES; SHOW DATABASES LIKE 'json*'; diff --git a/ql/src/test/queries/clientpositive/drop_partition_with_stats.q b/ql/src/test/queries/clientpositive/drop_partition_with_stats.q new file mode 100644 index 0000000..40b43c2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/drop_partition_with_stats.q @@ -0,0 +1,68 @@ +-- This test verifies that a table partition could be dropped with columns stats computed +-- The column stats for a partitioned table will go to PART_COL_STATS +CREATE DATABASE IF NOT EXISTS partstatsdb1; +USE partstatsdb1; +CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p11', Part2='P12'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p21', Part2='P22'); +ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key; + + +CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P11'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P12'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p21', Part2='P22'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p31', Part2='P32'); +ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE TestTable1 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key; + +CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p21', Part2='P22'); +ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key; + +ALTER TABLE partstatsdb1.testtable DROP PARTITION (part1='p11', Part2='P12'); +ALTER TABLE partstatsdb1.TestTable1 DROP PARTITION (part1='p11', Part2='P12'); +ALTER TABLE partstatsdb1.TESTTABLE2 DROP PARTITION (part1='p11', Part2='P12'); + +DROP TABLE partstatsdb1.testtable; +DROP TABLE partstatsdb1.TestTable1; +DROP TABLE partstatsdb1.TESTTABLE2; +DROP DATABASE partstatsdb1; + +CREATE DATABASE IF NOT EXISTS PARTSTATSDB2; +USE PARTSTATSDB2; +CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p11', Part2='P12'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p21', Part2='P22'); +ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key; + + +CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P11'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P12'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p21', Part2='P22'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p31', Part2='P32'); +ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE TestTable1 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key; + +CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12'); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p21', Part2='P22'); +ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key; +ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key; + +ALTER TABLE PARTSTATSDB2.testtable DROP PARTITION (part1='p11', Part2='P12'); +ALTER TABLE PARTSTATSDB2.TestTable1 DROP PARTITION (part1='p11', Part2='P12'); +ALTER TABLE PARTSTATSDB2.TESTTABLE2 DROP PARTITION (part1='p11', Part2='P12'); + +DROP TABLE PARTSTATSDB2.testtable; +DROP TABLE PARTSTATSDB2.TestTable1; +DROP TABLE PARTSTATSDB2.TESTTABLE2; +DROP DATABASE PARTSTATSDB2; + diff --git a/ql/src/test/queries/clientpositive/drop_table_with_stats.q b/ql/src/test/queries/clientpositive/drop_table_with_stats.q new file mode 100644 index 0000000..b655b53 --- /dev/null +++ b/ql/src/test/queries/clientpositive/drop_table_with_stats.q @@ -0,0 +1,43 @@ +-- This test verifies that a table could be dropped with columns stats computed +-- The column stats for table without partition will go to TAB_COL_STATS +CREATE DATABASE IF NOT EXISTS tblstatsdb1; +USE tblstatsdb1; +CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable; +ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key; + +CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1; +ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key; + +CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2; +ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key; + +DROP TABLE tblstatsdb1.testtable; +DROP TABLE tblstatsdb1.TestTable1; +DROP TABLE tblstatsdb1.TESTTABLE2; +DROP DATABASE tblstatsdb1; + +CREATE DATABASE IF NOT EXISTS TBLSTATSDB2; +USE TBLSTATSDB2; +CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable; +ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key; + + +CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1; +ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key; + + +CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING); +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2; +ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key; + + +DROP TABLE TBLSTATSDB2.testtable; +DROP TABLE TBLSTATSDB2.TestTable1; +DROP TABLE TBLSTATSDB2.TESTTABLE2; +DROP DATABASE TBLSTATSDB2; + diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1.q b/ql/src/test/queries/clientpositive/groupby_sort_1.q index da7c2a2..ed888bb 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_1.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_1.q @@ -3,7 +3,7 @@ set hive.enforce.sorting = true; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q index 8714653..a6e18c7 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q @@ -3,7 +3,7 @@ set hive.enforce.sorting = true; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q index 36c67cf..76a1725 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1.q @@ -4,7 +4,7 @@ set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; set hive.groupby.skewindata=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q index 068a675..1b24aec 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q @@ -4,7 +4,7 @@ set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; set hive.groupby.skewindata=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) diff --git a/ql/src/test/queries/clientpositive/input12.q b/ql/src/test/queries/clientpositive/input12.q index d4bc409..cb540db 100644 --- a/ql/src/test/queries/clientpositive/input12.q +++ b/ql/src/test/queries/clientpositive/input12.q @@ -2,7 +2,7 @@ set mapreduce.framework.name=yarn; set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/input12_hadoop20.q b/ql/src/test/queries/clientpositive/input12_hadoop20.q index 318cd37..17ee0a8 100644 --- a/ql/src/test/queries/clientpositive/input12_hadoop20.q +++ b/ql/src/test/queries/clientpositive/input12_hadoop20.q @@ -1,7 +1,7 @@ set mapred.job.tracker=localhost:58; set hive.exec.mode.local.auto=true; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/input39.q b/ql/src/test/queries/clientpositive/input39.q index 04201dd..471736b 100644 --- a/ql/src/test/queries/clientpositive/input39.q +++ b/ql/src/test/queries/clientpositive/input39.q @@ -1,4 +1,4 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string); diff --git a/ql/src/test/queries/clientpositive/join14.q b/ql/src/test/queries/clientpositive/join14.q index 638cc31..bacd446 100644 --- a/ql/src/test/queries/clientpositive/join14.q +++ b/ql/src/test/queries/clientpositive/join14.q @@ -1,5 +1,5 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/loadpart_err.q b/ql/src/test/queries/clientpositive/loadpart_err.q index cc9c1fe..86d63d1 100644 --- a/ql/src/test/queries/clientpositive/loadpart_err.q +++ b/ql/src/test/queries/clientpositive/loadpart_err.q @@ -2,7 +2,7 @@ set hive.cli.errors.ignore=true; ADD FILE ../../data/scripts/error_script; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19, 0.20, 0.20S, 0.23) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S, 0.23) -- (this test is flaky so it is currently disabled for all Hadoop versions) CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING); diff --git a/ql/src/test/queries/clientpositive/lvj_mapjoin.q b/ql/src/test/queries/clientpositive/lvj_mapjoin.q new file mode 100644 index 0000000..4a391b4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/lvj_mapjoin.q @@ -0,0 +1,37 @@ +-- SORT_QUERY_RESULTS + +drop table sour1; +drop table sour2; +drop table expod1; +drop table expod2; + +set hive.auto.convert.join=true; + +create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ','; +create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ','; + +load data local inpath '../../data/files/sour1.txt' into table sour1; +load data local inpath '../../data/files//sour2.txt' into table sour2; + +create table expod1(aid int, av array); +create table expod2(bid int, bv array); + +insert overwrite table expod1 select id, array(av1,av2,av3) from sour1; +insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2; + +explain with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid; + +with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid; + diff --git a/ql/src/test/queries/clientpositive/merge_join_1.q b/ql/src/test/queries/clientpositive/merge_join_1.q new file mode 100644 index 0000000..8954105 --- /dev/null +++ b/ql/src/test/queries/clientpositive/merge_join_1.q @@ -0,0 +1,30 @@ +drop table if exists test_join_1; +drop table if exists test_join_2; + +create table test_join_1(a string, b string); +create table test_join_2(a string, b string); + +explain +select * from +( + SELECT a a, b b + FROM test_join_1 +)t1 + +join + +( + SELECT a a, b b + FROM test_join_1 +)t2 + on t1.a = t2.a + and t1.a = t2.b + +join + +( + select a from test_join_2 +)t3 on t1.a = t3.a; + +drop table test_join_1; +drop table test_join_2; diff --git a/ql/src/test/queries/clientpositive/parquet_array_null_element.q b/ql/src/test/queries/clientpositive/parquet_array_null_element.q new file mode 100644 index 0000000..d711d26 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_null_element.q @@ -0,0 +1,33 @@ +DROP TABLE parquet_array_null_element_staging; +DROP TABLE parquet_array_null_element; + +CREATE TABLE parquet_array_null_element_staging ( + id int, + lstint ARRAY, + lststr ARRAY, + mp MAP +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +NULL DEFINED AS ''; + +CREATE TABLE parquet_array_null_element ( + id int, + lstint ARRAY, + lststr ARRAY, + mp MAP +) STORED AS PARQUET; + +DESCRIBE FORMATTED parquet_array_null_element; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_array_null_element.txt' OVERWRITE INTO TABLE parquet_array_null_element_staging; + +SELECT * FROM parquet_array_null_element_staging; + +INSERT OVERWRITE TABLE parquet_array_null_element SELECT * FROM parquet_array_null_element_staging; + +SELECT lstint from parquet_array_null_element; +SELECT lststr from parquet_array_null_element; +SELECT mp from parquet_array_null_element; +SELECT * FROM parquet_array_null_element; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct.q b/ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct.q new file mode 100644 index 0000000..b96d302 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_multi_field_struct.q @@ -0,0 +1,26 @@ +-- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs ( + locations ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs; + +SELECT * FROM parquet_array_of_multi_field_structs; + +DROP TABLE parquet_array_of_multi_field_structs; + +-- maps use the same writable structure, so validate that the data can be read +-- as a map instead of an array of structs + +CREATE TABLE parquet_map_view_of_multi_field_structs ( + locations MAP +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_map_view_of_multi_field_structs; + +SELECT * FROM parquet_map_view_of_multi_field_structs; + +DROP TABLE parquet_map_view_of_multi_field_structs; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_optional_elements.q b/ql/src/test/queries/clientpositive/parquet_array_of_optional_elements.q new file mode 100644 index 0000000..1ae9cce --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_optional_elements.q @@ -0,0 +1,12 @@ +-- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements ( + locations ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements; + +SELECT * FROM parquet_array_of_optional_elements; + +DROP TABLE parquet_array_of_optional_elements; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_required_elements.q b/ql/src/test/queries/clientpositive/parquet_array_of_required_elements.q new file mode 100644 index 0000000..967b333 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_required_elements.q @@ -0,0 +1,12 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements ( + locations ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements; + +SELECT * FROM parquet_array_of_required_elements; + +DROP TABLE parquet_array_of_required_elements; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct.q b/ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct.q new file mode 100644 index 0000000..f61fe29 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_single_field_struct.q @@ -0,0 +1,14 @@ +-- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs ( + single_element_groups ARRAY +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs; + +SELECT * FROM parquet_ambiguous_array_of_single_field_structs; + +DROP TABLE parquet_ambiguous_array_of_single_field_structs; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_structs.q b/ql/src/test/queries/clientpositive/parquet_array_of_structs.q new file mode 100644 index 0000000..9e8b56c --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_structs.q @@ -0,0 +1,12 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs ( + locations ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs; + +SELECT * FROM parquet_array_of_structs; + +DROP TABLE parquet_array_of_structs; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups.q b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups.q new file mode 100644 index 0000000..1a0698a --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_groups.q @@ -0,0 +1,13 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups ( + list_of_points ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups; + +SELECT * FROM parquet_array_of_unannotated_groups; + +DROP TABLE parquet_array_of_unannotated_groups; diff --git a/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives.q b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives.q new file mode 100644 index 0000000..c38d9dc --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_of_unannotated_primitives.q @@ -0,0 +1,13 @@ +-- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints ( + list_of_ints ARRAY +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints; + +SELECT * FROM parquet_array_of_unannotated_ints; + +DROP TABLE parquet_array_of_unannotated_ints; diff --git a/ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives.q b/ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives.q new file mode 100644 index 0000000..9f7ad3d --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_avro_array_of_primitives.q @@ -0,0 +1,12 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_avro_array_of_primitives ( + list_of_ints ARRAY +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_primitives; + +SELECT * FROM parquet_avro_array_of_primitives; + +DROP TABLE parquet_avro_array_of_primitives; diff --git a/ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct.q b/ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct.q new file mode 100644 index 0000000..d5fdb00 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_avro_array_of_single_field_struct.q @@ -0,0 +1,13 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs ( + single_element_groups ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs; + +SELECT * FROM parquet_avro_array_of_single_field_structs; + +DROP TABLE parquet_avro_array_of_single_field_structs; diff --git a/ql/src/test/queries/clientpositive/parquet_map_null.q b/ql/src/test/queries/clientpositive/parquet_map_null.q new file mode 100644 index 0000000..d31cb99 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_map_null.q @@ -0,0 +1,13 @@ +-- This test attempts to write a parquet table from an avro table that contains map null values + +DROP TABLE IF EXISTS avro_table; +DROP TABLE IF EXISTS parquet_table; + +CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO; +LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table; + +CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table; +SELECT * FROM parquet_table; + +DROP TABLE avro_table; +DROP TABLE parquet_table; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_map_of_maps.q b/ql/src/test/queries/clientpositive/parquet_map_of_maps.q new file mode 100644 index 0000000..4289f37 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_map_of_maps.q @@ -0,0 +1,15 @@ +-- this test reads and writes a parquet file with a map of maps + +CREATE TABLE parquet_map_of_maps ( + map_of_maps MAP> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/NestedMap.parquet' +OVERWRITE INTO TABLE parquet_map_of_maps; + +CREATE TABLE parquet_map_of_maps_copy STORED AS PARQUET AS SELECT * FROM parquet_map_of_maps; + +SELECT * FROM parquet_map_of_maps_copy; + +DROP TABLE parquet_map_of_maps; +DROP TABLE parquet_map_of_maps_copy; diff --git a/ql/src/test/queries/clientpositive/parquet_nested_complex.q b/ql/src/test/queries/clientpositive/parquet_nested_complex.q new file mode 100644 index 0000000..9ada66f --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_nested_complex.q @@ -0,0 +1,29 @@ +-- start with the original nestedcomplex test + +create table nestedcomplex ( +simple_int int, +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>, +max_nested_map array>>>>>>>>>>>>>>>>>>>>>, +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>, +simple_string string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'hive.serialization.extend.nesting.levels'='true', + 'line.delim'='\n' +) +; + +describe nestedcomplex; +describe extended nestedcomplex; + +load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex; + +-- and load the table into Parquet + +CREATE TABLE parquet_nested_complex STORED AS PARQUET AS SELECT * FROM nestedcomplex; + +SELECT * FROM parquet_nested_complex SORT BY simple_int; + +DROP TABLE nestedcomplex; +DROP TABLE parquet_nested_complex; diff --git a/ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives.q b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives.q new file mode 100644 index 0000000..38b8e6b --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_primitives.q @@ -0,0 +1,12 @@ +-- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives ( + list_of_ints ARRAY +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives; + +SELECT * FROM parquet_thrift_array_of_primitives; + +DROP TABLE parquet_thrift_array_of_primitives; diff --git a/ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct.q b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct.q new file mode 100644 index 0000000..bbb8c98 --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_thrift_array_of_single_field_struct.q @@ -0,0 +1,13 @@ +-- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs ( + single_element_groups ARRAY> +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs; + +SELECT * FROM parquet_thrift_array_of_single_field_structs; + +DROP TABLE parquet_thrift_array_of_single_field_structs; diff --git a/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q b/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q index 0c8424b..5bcb344 100644 --- a/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q +++ b/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q @@ -8,7 +8,7 @@ set mapred.min.split.size.per.rack=300; set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string); diff --git a/ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q b/ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q index 484e1fa..7429932 100644 --- a/ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q +++ b/ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q @@ -8,7 +8,7 @@ set mapred.min.split.size.per.rack=300; set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git a/ql/src/test/queries/clientpositive/split_sample.q b/ql/src/test/queries/clientpositive/split_sample.q deleted file mode 100644 index 952eaf7..0000000 --- a/ql/src/test/queries/clientpositive/split_sample.q +++ /dev/null @@ -1,115 +0,0 @@ -USE default; - -set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; -set mapred.max.split.size=300; -set mapred.min.split.size=300; -set mapred.min.split.size.per.node=300; -set mapred.min.split.size.per.rack=300; -set hive.merge.smallfiles.avgsize=1; - --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string); -insert overwrite table ss_i_part partition (p='1') select key, value from src; -insert overwrite table ss_i_part partition (p='2') select key, value from src; -insert overwrite table ss_i_part partition (p='3') select key, value from src; -create table ss_src2 as select key, value from ss_i_part; - -select count(1) from ss_src2 tablesample(1 percent); - --- sample first split -desc ss_src2; -set hive.sample.seednumber=0; -explain select key, value from ss_src2 tablesample(1 percent) limit 10; -select key, value from ss_src2 tablesample(1 percent) limit 10; - --- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src; -insert overwrite table ss_i_part partition (p='2') select key+20000, value from src; -insert overwrite table ss_i_part partition (p='3') select key+30000, value from src; -create table ss_src3 as select key, value from ss_i_part; -set hive.sample.seednumber=3; -create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -set hive.sample.seednumber=4; -create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -set hive.sample.seednumber=5; -create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; -select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t; - --- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; -select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; - --- sample all splits -select count(1) from ss_src2 tablesample(100 percent); - --- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; -select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; - --- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key; - --- sample one of two tables: -create table ss_src1 as select * from ss_src2; -select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k; - --- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); - -select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); - --- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent); -set mapred.max.split.size=300000; -set mapred.min.split.size=300000; -set mapred.min.split.size.per.node=300000; -set mapred.min.split.size.per.rack=300000; -select count(1) from ss_src2 tablesample(1 percent); -select count(1) from ss_src2 tablesample(50 percent); - ---HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B); -select count(1) from ss_src2 tablesample(100B); - -explain -select count(1) from ss_src2 tablesample(1K); -select count(1) from ss_src2 tablesample(1K); - --- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS); -select key, value from ss_src2 tablesample(0 ROWS); - -explain -select count(1) from ss_src2 tablesample(10 ROWS); -select count(1) from ss_src2 tablesample(10 ROWS); - -explain -select count(1) from ss_src2 tablesample(100 ROWS); -select count(1) from ss_src2 tablesample(100 ROWS); - -set hive.fetch.task.conversion=more; -select key from ss_src2 tablesample(200B); -select key from ss_src2 tablesample(10 ROWS); - -set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; --- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS); - ---HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x; diff --git a/ql/src/test/queries/clientpositive/stats_partscan_1.q b/ql/src/test/queries/clientpositive/stats_partscan_1.q index cdf92e4..b790b7d 100644 --- a/ql/src/test/queries/clientpositive/stats_partscan_1.q +++ b/ql/src/test/queries/clientpositive/stats_partscan_1.q @@ -7,7 +7,7 @@ set mapred.min.split.size.per.node=256; set mapred.min.split.size.per.rack=256; set mapred.max.split.size=256; --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- This test uses mapred.max.split.size/mapred.max.split.size for controlling -- number of input splits, which is not effective in hive 0.20. -- stats_partscan_1_23.q is the same test with this but has different result. diff --git a/ql/src/test/queries/clientpositive/uber_reduce.q b/ql/src/test/queries/clientpositive/uber_reduce.q index bcef271..34d5a12 100644 --- a/ql/src/test/queries/clientpositive/uber_reduce.q +++ b/ql/src/test/queries/clientpositive/uber_reduce.q @@ -3,7 +3,7 @@ SET mapreduce.job.ubertask.maxreduces=1; SET mapred.reduce.tasks=1; -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING); LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; diff --git a/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q b/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q index 5b8ad7a..7aae8ae 100644 --- a/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q +++ b/ql/src/test/queries/clientpositive/udaf_percentile_approx_20.q @@ -1,4 +1,4 @@ --- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +-- INCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE bucket (key double, value string) CLUSTERED BY (key) SORTED BY (key DESC) INTO 4 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket; diff --git a/ql/src/test/queries/clientpositive/udtf_stack.q b/ql/src/test/queries/clientpositive/udtf_stack.q index e7cbae0..1643ab5 100644 --- a/ql/src/test/queries/clientpositive/udtf_stack.q +++ b/ql/src/test/queries/clientpositive/udtf_stack.q @@ -5,3 +5,7 @@ EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2; SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2; + +EXPLAIN +SELECT stack(1, "en", "dbpedia", NULL ); +SELECT stack(1, "en", "dbpedia", NULL ); \ No newline at end of file diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out index 4606f32..255e5bb 100644 --- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out +++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out @@ -103,28 +103,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a - Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientnegative/join_nonexistent_part.q.out b/ql/src/test/results/clientnegative/join_nonexistent_part.q.out index c2a85aa..a924895 100644 --- a/ql/src/test/results/clientnegative/join_nonexistent_part.q.out +++ b/ql/src/test/results/clientnegative/join_nonexistent_part.q.out @@ -1 +1 @@ -Authorization failed:No privilege 'Select' found for inputs { database:default, table:src, columnName:key}. Use SHOW GRANT to get more details. +Authorization failed:No privilege 'Select' found for inputs { database:default, table:srcpart, columnName:key}. Use SHOW GRANT to get more details. diff --git a/ql/src/test/results/clientnegative/udf_assert_true.q.out b/ql/src/test/results/clientnegative/udf_assert_true.q.out index 4a5b30d..819d723 100644 --- a/ql/src/test/results/clientnegative/udf_assert_true.q.out +++ b/ql/src/test/results/clientnegative/udf_assert_true.q.out @@ -21,10 +21,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 > 0)) (type: void) outputColumnNames: _col0 @@ -48,7 +48,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 > 0)) (type: void) outputColumnNames: _col0 @@ -98,10 +98,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 < 2)) (type: void) outputColumnNames: _col0 @@ -125,7 +125,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: assert_true((_col5 < 2)) (type: void) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientnegative/udf_assert_true2.q.out b/ql/src/test/results/clientnegative/udf_assert_true2.q.out index 3684a3f..9760d0d 100644 --- a/ql/src/test/results/clientnegative/udf_assert_true2.q.out +++ b/ql/src/test/results/clientnegative/udf_assert_true2.q.out @@ -16,10 +16,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (1 + assert_true((_col5 < 2))) (type: double) outputColumnNames: _col0 @@ -43,7 +43,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (1 + assert_true((_col5 < 2))) (type: double) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out index 07aaae3..969f64b 100644 --- a/ql/src/test/results/clientpositive/allcolref_in_udf.q.out +++ b/ql/src/test/results/clientpositive/allcolref_in_udf.q.out @@ -82,18 +82,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -105,6 +93,18 @@ STAGE PLANS: Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index bdff1d7..86c12c7 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. @@ -6,7 +6,7 @@ CREATE TABLE tst1(key STRING, value STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tst1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- Tests that when overwriting a partition in a table after altering the bucketing/sorting metadata -- the partition metadata is updated as well. diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index de3cae6..42a9796 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -1,9 +1,9 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tst1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/alter_table_cascade.q.out b/ql/src/test/results/clientpositive/alter_table_cascade.q.out new file mode 100644 index 0000000..0139466 --- /dev/null +++ b/ql/src/test/results/clientpositive/alter_table_cascade.q.out @@ -0,0 +1,1387 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +drop table if exists alter_table_src +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +drop table if exists alter_table_src +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists alter_table_cascade +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists alter_table_cascade +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table alter_table_src(c1 string, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter_table_src +POSTHOOK: query: create table alter_table_src(c1 string, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter_table_src +PREHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_src +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alter_table_src +POSTHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_src +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alter_table_src +PREHOOK: query: create table alter_table_cascade (c1 string) partitioned by (p1 string, p2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter_table_cascade +POSTHOOK: query: create table alter_table_cascade (c1 string) partitioned by (p1 string, p2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter_table_cascade +PREHOOK: query: insert overwrite table alter_table_cascade partition (p1, p2) + select c1, 'abc', '123' from alter_table_src + union all + select c1, null, '123' from alter_table_src +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_src +PREHOOK: Output: default@alter_table_cascade +POSTHOOK: query: insert overwrite table alter_table_cascade partition (p1, p2) + select c1, 'abc', '123' from alter_table_src + union all + select c1, null, '123' from alter_table_src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_src +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: Lineage: alter_table_cascade PARTITION(p1=__HIVE_DEFAULT_PARTITION__,p2=123).c1 EXPRESSION [(alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), (alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), ] +POSTHOOK: Lineage: alter_table_cascade PARTITION(p1=abc,p2=123).c1 EXPRESSION [(alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), (alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), ] +PREHOOK: query: show partitions alter_table_cascade +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: show partitions alter_table_cascade +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@alter_table_cascade +p1=__HIVE_DEFAULT_PARTITION__/p2=123 +p1=abc/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_cascade where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +Beck abc 123 +Beck abc 123 +Beck abc 123 +Cluck abc 123 +Mary abc 123 +Mary abc 123 +Snow abc 123 +Tom abc 123 +Tom abc 123 +Tom abc 123 +PREHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck __HIVE_DEFAULT_PARTITION__ 123 +Beck __HIVE_DEFAULT_PARTITION__ 123 +Beck __HIVE_DEFAULT_PARTITION__ 123 +Cluck __HIVE_DEFAULT_PARTITION__ 123 +Mary __HIVE_DEFAULT_PARTITION__ 123 +Mary __HIVE_DEFAULT_PARTITION__ 123 +Snow __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- add columns c2 by replace columns (for HIVE-6131) +-- reload data to existing partition __HIVE_DEFAULT_PARTITION__ +-- load data to a new partition xyz +-- querying data (form new or existing partition) should return non-NULL values for the new column +alter table alter_table_cascade replace columns (c1 string, c2 string) cascade +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: query: -- add columns c2 by replace columns (for HIVE-6131) +-- reload data to existing partition __HIVE_DEFAULT_PARTITION__ +-- load data to a new partition xyz +-- querying data (form new or existing partition) should return non-NULL values for the new column +alter table alter_table_cascade replace columns (c1 string, c2 string) cascade +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +PREHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__',p2='123') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__',p2='123') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alter_table_cascade +POSTHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_cascade where p1='xyz' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='xyz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +Beck 0.0 xyz 123 +Beck 77.341 xyz 123 +Beck 79.9 xyz 123 +Cluck 5.96 xyz 123 +Mary 33.33 xyz 123 +Mary 4.329 xyz 123 +Snow 55.71 xyz 123 +Tom -12.25 xyz 123 +Tom 19.00 xyz 123 +Tom 234.79 xyz 123 +PREHOOK: query: select * from alter_table_cascade where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +Beck NULL abc 123 +Beck NULL abc 123 +Beck NULL abc 123 +Cluck NULL abc 123 +Mary NULL abc 123 +Mary NULL abc 123 +Snow NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +PREHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck 0.0 __HIVE_DEFAULT_PARTITION__ 123 +Beck 77.341 __HIVE_DEFAULT_PARTITION__ 123 +Beck 79.9 __HIVE_DEFAULT_PARTITION__ 123 +Cluck 5.96 __HIVE_DEFAULT_PARTITION__ 123 +Mary 33.33 __HIVE_DEFAULT_PARTITION__ 123 +Mary 4.329 __HIVE_DEFAULT_PARTITION__ 123 +Snow 55.71 __HIVE_DEFAULT_PARTITION__ 123 +Tom -12.25 __HIVE_DEFAULT_PARTITION__ 123 +Tom 19.00 __HIVE_DEFAULT_PARTITION__ 123 +Tom 234.79 __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- Change c2 to decimal(10,0), the change should cascaded to all partitions +-- the c2 value returned should be in decimal(10,0) +alter table alter_table_cascade change c2 c2 decimal(10,0) comment "change datatype" cascade +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +POSTHOOK: query: -- Change c2 to decimal(10,0), the change should cascaded to all partitions +-- the c2 value returned should be in decimal(10,0) +alter table alter_table_cascade change c2 c2 decimal(10,0) comment "change datatype" cascade +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_cascade where p1='xyz' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='xyz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +Beck 0 xyz 123 +Beck 77 xyz 123 +Beck 80 xyz 123 +Cluck 6 xyz 123 +Mary 33 xyz 123 +Mary 4 xyz 123 +Snow 56 xyz 123 +Tom -12 xyz 123 +Tom 19 xyz 123 +Tom 235 xyz 123 +PREHOOK: query: select * from alter_table_cascade where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +Beck NULL abc 123 +Beck NULL abc 123 +Beck NULL abc 123 +Cluck NULL abc 123 +Mary NULL abc 123 +Mary NULL abc 123 +Snow NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +PREHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck 0 __HIVE_DEFAULT_PARTITION__ 123 +Beck 77 __HIVE_DEFAULT_PARTITION__ 123 +Beck 80 __HIVE_DEFAULT_PARTITION__ 123 +Cluck 6 __HIVE_DEFAULT_PARTITION__ 123 +Mary 33 __HIVE_DEFAULT_PARTITION__ 123 +Mary 4 __HIVE_DEFAULT_PARTITION__ 123 +Snow 56 __HIVE_DEFAULT_PARTITION__ 123 +Tom -12 __HIVE_DEFAULT_PARTITION__ 123 +Tom 19 __HIVE_DEFAULT_PARTITION__ 123 +Tom 235 __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- rename c1 to c2fromc1 and move it to after c2, the change should cascaded to all partitions +alter table alter_table_cascade change c1 c2fromc1 string comment "change position after" after c2 cascade +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +POSTHOOK: query: -- rename c1 to c2fromc1 and move it to after c2, the change should cascaded to all partitions +alter table alter_table_cascade change c1 c2fromc1 string comment "change position after" after c2 cascade +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c2 decimal(10,0) change datatype +c2fromc1 string change position after +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c2 decimal(10,0) change datatype +c2fromc1 string change position after +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c2 decimal(10,0) change datatype +c2fromc1 string change position after +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c2 decimal(10,0) change datatype +c2fromc1 string change position after +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: -- rename c2fromc1 back to c1 and move to first as c1, the change should cascaded to all partitions +alter table alter_table_cascade change c2fromc1 c1 string comment "change position first" first cascade +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +POSTHOOK: query: -- rename c2fromc1 back to c1 and move to first as c1, the change should cascaded to all partitions +alter table alter_table_cascade change c2fromc1 c1 string comment "change position first" first cascade +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string change position first +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string change position first +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string change position first +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string change position first +c2 decimal(10,0) change datatype +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: -- Try out replace columns, the change should cascaded to all partitions +alter table alter_table_cascade replace columns (c1 string) cascade +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +POSTHOOK: query: -- Try out replace columns, the change should cascaded to all partitions +alter table alter_table_cascade replace columns (c1 string) cascade +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_cascade where p1='xyz' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='xyz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +Beck xyz 123 +Beck xyz 123 +Beck xyz 123 +Cluck xyz 123 +Mary xyz 123 +Mary xyz 123 +Snow xyz 123 +Tom xyz 123 +Tom xyz 123 +Tom xyz 123 +PREHOOK: query: select * from alter_table_cascade where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +Beck abc 123 +Beck abc 123 +Beck abc 123 +Cluck abc 123 +Mary abc 123 +Mary abc 123 +Snow abc 123 +Tom abc 123 +Tom abc 123 +Tom abc 123 +PREHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck __HIVE_DEFAULT_PARTITION__ 123 +Beck __HIVE_DEFAULT_PARTITION__ 123 +Beck __HIVE_DEFAULT_PARTITION__ 123 +Cluck __HIVE_DEFAULT_PARTITION__ 123 +Mary __HIVE_DEFAULT_PARTITION__ 123 +Mary __HIVE_DEFAULT_PARTITION__ 123 +Snow __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- Try add columns, the change should cascaded to all partitions +alter table alter_table_cascade add columns (c2 decimal(14,4)) cascade +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade +PREHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +PREHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +POSTHOOK: query: -- Try add columns, the change should cascaded to all partitions +alter table alter_table_cascade add columns (c2 decimal(14,4)) cascade +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade +POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=abc/p2=123 +POSTHOOK: Output: default@alter_table_cascade@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_cascade +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(14,4) +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(14,4) +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(14,4) +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_cascade +POSTHOOK: query: describe alter_table_cascade partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_cascade +c1 string +c2 decimal(14,4) +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_cascade where p1='xyz' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='xyz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=xyz/p2=123 +#### A masked pattern was here #### +Beck 0 xyz 123 +Beck 77.341 xyz 123 +Beck 79.9 xyz 123 +Cluck 5.96 xyz 123 +Mary 33.33 xyz 123 +Mary 4.329 xyz 123 +Snow 55.71 xyz 123 +Tom -12.25 xyz 123 +Tom 19 xyz 123 +Tom 234.79 xyz 123 +PREHOOK: query: select * from alter_table_cascade where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=abc/p2=123 +#### A masked pattern was here #### +Beck NULL abc 123 +Beck NULL abc 123 +Beck NULL abc 123 +Cluck NULL abc 123 +Mary NULL abc 123 +Mary NULL abc 123 +Snow NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +PREHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_cascade +PREHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_cascade where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_cascade +POSTHOOK: Input: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck 0 __HIVE_DEFAULT_PARTITION__ 123 +Beck 77.341 __HIVE_DEFAULT_PARTITION__ 123 +Beck 79.9 __HIVE_DEFAULT_PARTITION__ 123 +Cluck 5.96 __HIVE_DEFAULT_PARTITION__ 123 +Mary 33.33 __HIVE_DEFAULT_PARTITION__ 123 +Mary 4.329 __HIVE_DEFAULT_PARTITION__ 123 +Snow 55.71 __HIVE_DEFAULT_PARTITION__ 123 +Tom -12.25 __HIVE_DEFAULT_PARTITION__ 123 +Tom 19 __HIVE_DEFAULT_PARTITION__ 123 +Tom 234.79 __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- + +drop table if exists alter_table_restrict +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- + +drop table if exists alter_table_restrict +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table alter_table_restrict (c1 string) partitioned by (p1 string, p2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: create table alter_table_restrict (c1 string) partitioned by (p1 string, p2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alter_table_restrict +PREHOOK: query: insert overwrite table alter_table_restrict partition (p1, p2) + select c1, 'abc', '123' from alter_table_src + union all + select c1, null, '123' from alter_table_src +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_src +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: insert overwrite table alter_table_restrict partition (p1, p2) + select c1, 'abc', '123' from alter_table_src + union all + select c1, null, '123' from alter_table_src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_src +POSTHOOK: Output: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: Output: default@alter_table_restrict@p1=abc/p2=123 +POSTHOOK: Lineage: alter_table_restrict PARTITION(p1=__HIVE_DEFAULT_PARTITION__,p2=123).c1 EXPRESSION [(alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), (alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), ] +POSTHOOK: Lineage: alter_table_restrict PARTITION(p1=abc,p2=123).c1 EXPRESSION [(alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), (alter_table_src)alter_table_src.FieldSchema(name:c1, type:string, comment:null), ] +PREHOOK: query: show partitions alter_table_restrict +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: show partitions alter_table_restrict +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@alter_table_restrict +p1=__HIVE_DEFAULT_PARTITION__/p2=123 +p1=abc/p2=123 +PREHOOK: query: describe alter_table_restrict +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_restrict where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Input: default@alter_table_restrict@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_restrict where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Input: default@alter_table_restrict@p1=abc/p2=123 +#### A masked pattern was here #### +Beck abc 123 +Beck abc 123 +Beck abc 123 +Cluck abc 123 +Mary abc 123 +Mary abc 123 +Snow abc 123 +Tom abc 123 +Tom abc 123 +Tom abc 123 +PREHOOK: query: select * from alter_table_restrict where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Input: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_restrict where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Input: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck __HIVE_DEFAULT_PARTITION__ 123 +Beck __HIVE_DEFAULT_PARTITION__ 123 +Beck __HIVE_DEFAULT_PARTITION__ 123 +Cluck __HIVE_DEFAULT_PARTITION__ 123 +Mary __HIVE_DEFAULT_PARTITION__ 123 +Mary __HIVE_DEFAULT_PARTITION__ 123 +Snow __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +Tom __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- add columns c2 by replace columns (for HIVE-6131) without cascade +-- only table column definition has changed, partitions do not +-- after replace, only new partition xyz return the value to new added columns but not existing partitions abc and __HIVE_DEFAULT_PARTITION__ +alter table alter_table_restrict replace columns (c1 string, c2 string) restrict +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: -- add columns c2 by replace columns (for HIVE-6131) without cascade +-- only table column definition has changed, partitions do not +-- after replace, only new partition xyz return the value to new added columns but not existing partitions abc and __HIVE_DEFAULT_PARTITION__ +alter table alter_table_restrict replace columns (c1 string, c2 string) restrict +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Output: default@alter_table_restrict +PREHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='abc', p2='123') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alter_table_restrict@p1=abc/p2=123 +POSTHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='abc', p2='123') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alter_table_restrict@p1=abc/p2=123 +PREHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__',p2='123') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +POSTHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__',p2='123') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +PREHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='xyz', p2='123') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: load data local inpath '../../data/files/dec.txt' overwrite into table alter_table_restrict partition (p1='xyz', p2='123') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@alter_table_restrict +POSTHOOK: Output: default@alter_table_restrict@p1=xyz/p2=123 +PREHOOK: query: describe alter_table_restrict +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: select * from alter_table_restrict where p1='xyz' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Input: default@alter_table_restrict@p1=xyz/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_restrict where p1='xyz' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Input: default@alter_table_restrict@p1=xyz/p2=123 +#### A masked pattern was here #### +Beck 0.0 xyz 123 +Beck 77.341 xyz 123 +Beck 79.9 xyz 123 +Cluck 5.96 xyz 123 +Mary 33.33 xyz 123 +Mary 4.329 xyz 123 +Snow 55.71 xyz 123 +Tom -12.25 xyz 123 +Tom 19.00 xyz 123 +Tom 234.79 xyz 123 +PREHOOK: query: select * from alter_table_restrict where p1='abc' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Input: default@alter_table_restrict@p1=abc/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_restrict where p1='abc' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Input: default@alter_table_restrict@p1=abc/p2=123 +#### A masked pattern was here #### +Beck NULL abc 123 +Beck NULL abc 123 +Beck NULL abc 123 +Cluck NULL abc 123 +Mary NULL abc 123 +Mary NULL abc 123 +Snow NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +Tom NULL abc 123 +PREHOOK: query: select * from alter_table_restrict where p1='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Input: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_table_restrict where p1='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Input: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 +#### A masked pattern was here #### +Beck NULL __HIVE_DEFAULT_PARTITION__ 123 +Beck NULL __HIVE_DEFAULT_PARTITION__ 123 +Beck NULL __HIVE_DEFAULT_PARTITION__ 123 +Cluck NULL __HIVE_DEFAULT_PARTITION__ 123 +Mary NULL __HIVE_DEFAULT_PARTITION__ 123 +Mary NULL __HIVE_DEFAULT_PARTITION__ 123 +Snow NULL __HIVE_DEFAULT_PARTITION__ 123 +Tom NULL __HIVE_DEFAULT_PARTITION__ 123 +Tom NULL __HIVE_DEFAULT_PARTITION__ 123 +Tom NULL __HIVE_DEFAULT_PARTITION__ 123 +PREHOOK: query: -- Change c2 to decimal(10,0), only limited to table and new partition +alter table alter_table_restrict change c2 c2 decimal(10,0) restrict +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: -- Change c2 to decimal(10,0), only limited to table and new partition +alter table alter_table_restrict change c2 c2 decimal(10,0) restrict +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Output: default@alter_table_restrict +PREHOOK: query: describe alter_table_restrict +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 decimal(10,0) +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: -- Try out replace columns, only limited to table and new partition +alter table alter_table_restrict replace columns (c1 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: -- Try out replace columns, only limited to table and new partition +alter table alter_table_restrict replace columns (c1 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Output: default@alter_table_restrict +PREHOOK: query: describe alter_table_restrict +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: -- Try add columns, only limited to table and new partition +alter table alter_table_restrict add columns (c2 decimal(14,4)) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@alter_table_restrict +PREHOOK: Output: default@alter_table_restrict +POSTHOOK: query: -- Try add columns, only limited to table and new partition +alter table alter_table_restrict add columns (c2 decimal(14,4)) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@alter_table_restrict +POSTHOOK: Output: default@alter_table_restrict +PREHOOK: query: describe alter_table_restrict +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 decimal(14,4) +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='xyz', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +c2 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='abc', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string +PREHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@alter_table_restrict +POSTHOOK: query: describe alter_table_restrict partition (p1='__HIVE_DEFAULT_PARTITION__', p2='123') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@alter_table_restrict +c1 string +p1 string +p2 string + +# Partition Information +# col_name data_type comment + +p1 string +p2 string diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 718b43c..2640ff7 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -178,22 +178,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint) + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: string), _col2 (type: bigint) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: min(_col1) - keys: _col0 (type: string), _col2 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/annotate_stats_join.q.out index c9ad41d..ee46003 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -163,29 +163,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) Reduce Operator Tree: Join Operator condition map: @@ -230,17 +230,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: dept - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - TableScan alias: emp Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -252,6 +241,17 @@ STAGE PLANS: Map-reduce partition columns: deptid (type: int), lastname (type: string) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: locid (type: int) + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -295,17 +295,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - TableScan alias: e Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -317,6 +306,17 @@ STAGE PLANS: Map-reduce partition columns: deptid (type: int), lastname (type: string) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: locid (type: int) + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -361,17 +361,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: dept - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string), deptname (type: string) - sort order: +++ - Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - TableScan alias: emp Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -383,6 +372,17 @@ STAGE PLANS: Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: locid (type: int) + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string), deptname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -430,17 +430,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) TableScan alias: e1 Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE @@ -454,17 +454,17 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: lastname (type: string), locid (type: int) TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) Reduce Operator Tree: Join Operator condition map: @@ -509,29 +509,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: deptname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: lastname (type: string), locid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) TableScan alias: l Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE @@ -590,17 +590,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: deptid (type: int), deptname (type: string) - sort order: ++ - Map-reduce partition columns: deptid (type: int), deptname (type: string) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - TableScan alias: e Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -613,6 +602,17 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: locid (type: int) TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + TableScan alias: l Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index ea800a2..70c9e1d 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -578,7 +578,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: s_store_sk is not null (type: boolean) @@ -589,7 +589,7 @@ STAGE PLANS: Map-reduce partition columns: s_store_sk (type: int) Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: s + alias: s1 Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: s_store_sk is not null (type: boolean) @@ -652,7 +652,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s_store_sk > 1000) (type: boolean) @@ -663,7 +663,7 @@ STAGE PLANS: Map-reduce partition columns: s_store_sk (type: int) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: s + alias: s1 Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s_store_sk > 1000) (type: boolean) @@ -726,27 +726,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (s_store_sk is not null and (s_floor_space > 1000)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: s_store_sk (type: int) sort order: + Map-reduce partition columns: s_store_sk (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: s + alias: s1 Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s_store_sk is not null and (s_floor_space > 1000)) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: s_store_sk (type: int) sort order: + Map-reduce partition columns: s_store_sk (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: ss Statistics: Num rows: 1000 Data size: 130523 Basic stats: COMPLETE Column stats: COMPLETE @@ -800,7 +800,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: s_store_sk is not null (type: boolean) @@ -811,7 +811,7 @@ STAGE PLANS: Map-reduce partition columns: s_store_sk (type: int) Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan - alias: s + alias: s1 Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: s_store_sk is not null (type: boolean) @@ -866,12 +866,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) join customer_address ca on (ca.ca_address_sk = ss.ss_addr_sk) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -913,10 +913,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col38 (type: int) + sort order: + + Map-reduce partition columns: _col38 (type: int) + Statistics: Num rows: 916 Data size: 7328 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + TableScan alias: ca Statistics: Num rows: 20 Data size: 2114 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -927,13 +934,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: ca_address_sk (type: int) Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col38 (type: int) - sort order: + - Map-reduce partition columns: _col38 (type: int) - Statistics: Num rows: 916 Data size: 7328 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index 8ef4964..8fbb208 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -282,7 +282,7 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: null (type: string) + expressions: null (type: void) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE ListSink @@ -497,33 +497,21 @@ POSTHOOK: query: -- numRows: 2 rawDataSize: 112 explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypes_orc - Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: null (type: void) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - ListSink + TableScan + alias: alltypes_orc + Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + ListSink PREHOOK: query: -- numRows: 2 rawDataSize: 224 explain select cast("58.174" as DECIMAL) from alltypes_orc diff --git a/ql/src/test/results/clientpositive/annotate_stats_union.q.out b/ql/src/test/results/clientpositive/annotate_stats_union.q.out index e0e1504..919015a 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -394,8 +394,8 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE + alias: loc_staging + Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string) outputColumnNames: _col0 @@ -414,8 +414,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan - alias: loc_staging - Statistics: Num rows: 8 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/archive.q.out b/ql/src/test/results/clientpositive/archive.q.out deleted file mode 100644 index 5d15bd5..0000000 --- a/ql/src/test/results/clientpositive/archive.q.out +++ /dev/null @@ -1,601 +0,0 @@ -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc -PREHOOK: type: DROPTABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tstsrc like src -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrc like src -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrc -PREHOOK: query: insert overwrite table tstsrc select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tstsrc -POSTHOOK: query: insert overwrite table tstsrc select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tstsrc -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') -select key, value from srcpart where ds='2008-04-08' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 3 -PREHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key -WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -0 val_0 2008-04-08 12 0 val_0 -PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrcpart -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 -#### A masked pattern was here #### -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48479881068 -PREHOOK: query: CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE harbucket(key INT) -PARTITIONED by (ds STRING) -CLUSTERED BY (key) INTO 10 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@harbucket -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@harbucket@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@harbucket@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -PREHOOK: type: QUERY -PREHOOK: Input: default@harbucket -PREHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@harbucket -POSTHOOK: Input: default@harbucket@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -10 -20 -30 -PREHOOK: query: CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING) -PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE old_name(key INT) -PARTITIONED by (ds STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@old_name -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -PREHOOK: type: QUERY -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@old_name@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@old_name@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@old_name -PREHOOK: Output: default@old_name@ds=1 -POSTHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@old_name -POSTHOOK: Output: default@old_name@ds=1 -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@old_name -PREHOOK: Input: default@old_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@old_name -POSTHOOK: Input: default@old_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -48656137 -PREHOOK: query: ALTER TABLE old_name RENAME TO new_name -PREHOOK: type: ALTERTABLE_RENAME -PREHOOK: Input: default@old_name -PREHOOK: Output: default@old_name -POSTHOOK: query: ALTER TABLE old_name RENAME TO new_name -POSTHOOK: type: ALTERTABLE_RENAME -POSTHOOK: Input: default@old_name -POSTHOOK: Output: default@new_name -POSTHOOK: Output: default@old_name -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 -PREHOOK: type: QUERY -PREHOOK: Input: default@new_name -PREHOOK: Input: default@new_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col -FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@new_name -POSTHOOK: Input: default@new_name@ds=1 -#### A masked pattern was here #### -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -NULL -PREHOOK: query: drop table tstsrc -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tstsrc -PREHOOK: Output: default@tstsrc -POSTHOOK: query: drop table tstsrc -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tstsrc -POSTHOOK: Output: default@tstsrc -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/archive_corrupt.q.out b/ql/src/test/results/clientpositive/archive_corrupt.q.out deleted file mode 100644 index 8aba24e..0000000 --- a/ql/src/test/results/clientpositive/archive_corrupt.q.out +++ /dev/null @@ -1,158 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: drop table tstsrcpart -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrcpart -POSTHOOK: type: DROPTABLE -PREHOOK: query: create table tstsrcpart like srcpart -PREHOOK: type: CREATETABLE -POSTHOOK: query: create table tstsrcpart like srcpart -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@tstsrcpart -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: LOAD -PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- The version of GzipCodec provided in Hadoop 0.20 silently ignores --- file format errors. However, versions of Hadoop that include --- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception --- to be thrown during the LOAD step. This behavior is now tested in --- clientnegative/archive_corrupt.q - -load data local inpath '../../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: LOAD -POSTHOOK: Output: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') -select key, value from srcpart where ds='2008-04-08' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') -select key, value from srcpart where ds='2008-04-09' and hr='11' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') -select key, value from srcpart where ds='2008-04-09' and hr='12' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -# col_name data_type comment - -key string default -value string default -ds string None -hr string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -#### A masked pattern was here #### -PREHOOK: query: alter table tstsrcpart archive partition (ds='2008-04-08', hr='11') -PREHOOK: type: ALTERTABLE_ARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table tstsrcpart archive partition (ds='2008-04-08', hr='11') -POSTHOOK: type: ALTERTABLE_ARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -PREHOOK: type: DESCTABLE -POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -# col_name data_type comment - -key string default -value string default -ds string None -hr string None - -# Partition Information -# col_name data_type comment - -ds string None -hr string None - -#### A masked pattern was here #### -PREHOOK: query: alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11') -PREHOOK: type: ALTERTABLE_UNARCHIVE -PREHOOK: Input: default@tstsrcpart -PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: query: alter table tstsrcpart unarchive partition (ds='2008-04-08', hr='11') -POSTHOOK: type: ALTERTABLE_UNARCHIVE -POSTHOOK: Input: default@tstsrcpart -POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out b/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out index 7ed53cb..c2b9872 100644 --- a/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out +++ b/ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out @@ -1,10 +1,6 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc +PREHOOK: query: drop table tstsrc PREHOOK: type: DROPTABLE -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) - -drop table tstsrc +POSTHOOK: query: drop table tstsrc POSTHOOK: type: DROPTABLE PREHOOK: query: drop table tstsrcpart PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/authorization_explain.q.out b/ql/src/test/results/clientpositive/authorization_explain.q.out index ba73021..3d97227 100644 --- a/ql/src/test/results/clientpositive/authorization_explain.q.out +++ b/ql/src/test/results/clientpositive/authorization_explain.q.out @@ -4,8 +4,8 @@ PREHOOK: type: QUERY POSTHOOK: query: explain authorization select * from src join srcpart POSTHOOK: type: QUERY INPUTS: - default@srcpart default@src + default@srcpart default@srcpart@ds=2008-04-08/hr=11 default@srcpart@ds=2008-04-08/hr=12 default@srcpart@ds=2008-04-09/hr=11 @@ -17,8 +17,8 @@ CURRENT_USER: OPERATION: QUERY AUTHORIZATION_FAILURES: - No privilege 'Select' found for inputs { database:default, table:srcpart, columnName:key} No privilege 'Select' found for inputs { database:default, table:src, columnName:key} + No privilege 'Select' found for inputs { database:default, table:srcpart, columnName:key} Warning: Shuffle Join JOIN[4][tables = [src, srcpart]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain formatted authorization select * from src join srcpart PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join0.q.out b/ql/src/test/results/clientpositive/auto_join0.q.out index 9261ce0..cf384e9 100644 --- a/ql/src/test/results/clientpositive/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/auto_join0.q.out @@ -78,14 +78,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join10.q.out b/ql/src/test/results/clientpositive/auto_join10.q.out index 3d2bcc2..7fb3070 100644 --- a/ql/src/test/results/clientpositive/auto_join10.q.out +++ b/ql/src/test/results/clientpositive/auto_join10.q.out @@ -70,19 +70,15 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join11.q.out b/ql/src/test/results/clientpositive/auto_join11.q.out index 8dbad67..98c8285 100644 --- a/ql/src/test/results/clientpositive/auto_join11.q.out +++ b/ql/src/test/results/clientpositive/auto_join11.q.out @@ -70,19 +70,15 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index 037116c..f116e23 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -104,19 +104,15 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join13.q.out b/ql/src/test/results/clientpositive/auto_join13.q.out index 0cb9b4f..3396a0c 100644 --- a/ql/src/test/results/clientpositive/auto_join13.q.out +++ b/ql/src/test/results/clientpositive/auto_join13.q.out @@ -111,19 +111,15 @@ STAGE PLANS: 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col3 Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join14.q.out b/ql/src/test/results/clientpositive/auto_join14.q.out index 450a961..55c9b5d 100644 --- a/ql/src/test/results/clientpositive/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/auto_join14.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/auto_join16.q.out b/ql/src/test/results/clientpositive/auto_join16.q.out index f96bae3..bd5b378 100644 --- a/ql/src/test/results/clientpositive/auto_join16.q.out +++ b/ql/src/test/results/clientpositive/auto_join16.q.out @@ -66,19 +66,15 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join18.q.out b/ql/src/test/results/clientpositive/auto_join18.q.out index 0de3f2a..2303f18 100644 --- a/ql/src/test/results/clientpositive/auto_join18.q.out +++ b/ql/src/test/results/clientpositive/auto_join18.q.out @@ -36,40 +36,37 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) + aggregations: count(value) + keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -79,14 +76,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -96,22 +93,18 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -143,41 +136,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out index 46559a7..ee5a32c 100644 --- a/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out @@ -38,40 +38,37 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + aggregations: count(value) + keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -81,15 +78,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -98,22 +95,18 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -145,41 +138,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/auto_join24.q.out b/ql/src/test/results/clientpositive/auto_join24.q.out index 1fa3e6e..df89f5c 100644 --- a/ql/src/test/results/clientpositive/auto_join24.q.out +++ b/ql/src/test/results/clientpositive/auto_join24.q.out @@ -72,19 +72,15 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out index d494d95..a40615f 100644 --- a/ql/src/test/results/clientpositive/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -67,22 +67,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_join27.q.out b/ql/src/test/results/clientpositive/auto_join27.q.out index c16992f..db348b7 100644 --- a/ql/src/test/results/clientpositive/auto_join27.q.out +++ b/ql/src/test/results/clientpositive/auto_join27.q.out @@ -23,14 +23,14 @@ JOIN ON src_12.key = src3.k AND src3.k < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-7 depends on stages: Stage-4 Stage-6 depends on stages: Stage-7 Stage-3 depends on stages: Stage-6 Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -39,20 +39,16 @@ STAGE PLANS: Filter Operator predicate: (key < 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -100,32 +96,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 249 Data size: 2644 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -147,19 +117,41 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Union + Statistics: Num rows: 249 Data size: 2644 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/auto_join30.q.out b/ql/src/test/results/clientpositive/auto_join30.q.out index 608ca22..0072d7a 100644 --- a/ql/src/test/results/clientpositive/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/auto_join30.q.out @@ -90,19 +90,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -163,19 +160,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -204,21 +198,17 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -362,19 +352,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -429,21 +416,17 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -584,19 +567,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -651,21 +631,17 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -767,17 +743,16 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -793,14 +768,14 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 $INTNAME2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -840,19 +815,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -888,7 +860,7 @@ STAGE PLANS: $INTNAME Fetch Operator limit: -1 - $INTNAME1 + $INTNAME2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -903,7 +875,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME1 + $INTNAME2 TableScan HashTable Sink Operator condition expressions: @@ -932,33 +904,30 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 - $INTNAME2 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -969,7 +938,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME2 + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -998,19 +967,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1029,13 +995,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1047,21 +1013,17 @@ STAGE PLANS: 2 outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1073,17 +1035,18 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1112,8 +1075,8 @@ STAGE PLANS: value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1196,17 +1159,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1222,14 +1184,14 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 $INTNAME2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -1269,19 +1231,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1317,7 +1276,7 @@ STAGE PLANS: $INTNAME Fetch Operator limit: -1 - $INTNAME1 + $INTNAME2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -1332,7 +1291,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME1 + $INTNAME2 TableScan HashTable Sink Operator condition expressions: @@ -1361,19 +1320,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1392,13 +1348,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1410,21 +1366,17 @@ STAGE PLANS: 2 outputColumnNames: _col2, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1433,17 +1385,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1469,8 +1422,8 @@ STAGE PLANS: value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1551,17 +1504,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1577,14 +1529,14 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 $INTNAME2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -1624,19 +1576,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1681,13 +1630,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1699,21 +1648,17 @@ STAGE PLANS: 2 outputColumnNames: _col2, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1722,17 +1667,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1758,8 +1704,8 @@ STAGE PLANS: value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1840,17 +1786,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1866,14 +1811,14 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 - $INTNAME2 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -1884,7 +1829,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME2 + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -1913,19 +1858,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1970,13 +1912,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1988,21 +1930,17 @@ STAGE PLANS: 2 outputColumnNames: _col2, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -2011,17 +1949,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2047,8 +1986,8 @@ STAGE PLANS: value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2129,17 +2068,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2155,14 +2093,14 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 - $INTNAME2 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -2173,7 +2111,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME2 + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -2202,19 +2140,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -2259,13 +2194,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2277,21 +2212,17 @@ STAGE PLANS: 2 outputColumnNames: _col2, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -2300,17 +2231,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2336,8 +2268,8 @@ STAGE PLANS: value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out index b0df202..44122eb 100644 --- a/ql/src/test/results/clientpositive/auto_join31.q.out +++ b/ql/src/test/results/clientpositive/auto_join31.q.out @@ -41,17 +41,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -70,7 +69,7 @@ STAGE PLANS: $INTNAME Fetch Operator limit: -1 - $INTNAME1 + $INTNAME2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -85,7 +84,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME1 + $INTNAME2 TableScan HashTable Sink Operator condition expressions: @@ -114,19 +113,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -159,14 +155,14 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 - $INTNAME2 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -177,7 +173,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - $INTNAME2 + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -206,19 +202,16 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -237,13 +230,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -255,21 +248,17 @@ STAGE PLANS: 2 outputColumnNames: _col2, _col3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -278,17 +267,18 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -314,8 +304,8 @@ STAGE PLANS: value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out index bc2d56c..6bf5e7d 100644 --- a/ql/src/test/results/clientpositive/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -74,21 +74,17 @@ STAGE PLANS: 1 name (type: string) outputColumnNames: _col0, _col8 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -184,18 +180,15 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -300,18 +293,15 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index aed67b6..d9ab9af 100644 --- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -179,35 +179,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: dim_pay_date + alias: orderpayment Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: date is not null (type: boolean) + predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: date (type: string) sort order: + Map-reduce partition columns: date (type: string) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 + value expressions: dealid (type: int), cityid (type: int), userid (type: int) auto parallelism: false TableScan - alias: orderpayment + alias: dim_pay_date Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) + predicate: date is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: date (type: string) sort order: + Map-reduce partition columns: date (type: string) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: dealid (type: int), cityid (type: int), userid (type: int) + tag: 1 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -257,7 +257,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [dim_pay_date, orderpayment] + /orderpayment_small [orderpayment, dim_pay_date] Needs Tagging: true Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index c3a539d..cf065fb 100644 --- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -138,7 +138,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -148,8 +148,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -159,7 +160,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -495,28 +495,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -848,28 +848,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key is not null and value is not null) and (key > 100)) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key is not null and value is not null) and (key > 100)) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out index 2583d9a..7e09f3b 100644 --- a/ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/auto_smb_mapjoin_14.q.out @@ -77,14 +77,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -145,8 +144,8 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -168,19 +167,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -199,7 +195,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -292,9 +288,9 @@ on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-3 depends on stages: Stage-1, Stage-4 + Stage-2 depends on stages: Stage-1, Stage-4 Stage-4 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -316,36 +312,30 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -397,34 +387,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -521,14 +505,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -624,14 +607,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -751,14 +733,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -868,14 +849,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -981,19 +961,17 @@ STAGE PLANS: 0 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1087,14 +1065,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1185,14 +1162,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1296,14 +1272,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1440,16 +1415,13 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-8 Conditional Operator @@ -1706,32 +1678,26 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index 5a7f174..9620c64 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -167,16 +167,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -394,16 +393,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -697,16 +695,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1025,16 +1022,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1236,16 +1232,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out index 7606dd2..be9d9db 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out @@ -126,19 +126,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -165,19 +163,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -274,23 +270,19 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -349,17 +341,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out index 8372a63..9a15daa 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out @@ -237,19 +237,17 @@ STAGE PLANS: 1 key (type: string) Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -599,19 +597,17 @@ STAGE PLANS: 1 key (type: string) Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -967,19 +963,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1422,19 +1416,17 @@ STAGE PLANS: Position of Big Table: 2 Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out index 3c30a31..7caa8ff 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out @@ -138,7 +138,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-4:MAPRED' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -204,12 +204,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-2 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2 + Stage-11 is a root stage + Stage-4 depends on stages: Stage-11 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: a @@ -263,7 +263,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small - b + c Fetch Operator limit: -1 Partition Description: @@ -275,22 +275,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium - numFiles 3 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 170 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -298,22 +298,69 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 3 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_medium - name: default.bucket_medium + name: default.bucket_big + name: default.bucket_big + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big d Fetch Operator limit: -1 @@ -378,32 +425,28 @@ STAGE PLANS: HashTable Sink Operator condition expressions: 0 - 1 - 2 + 1 {key} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 - b + Position of Big Table: 1 + c TableScan - alias: b - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: 0 1 - 2 keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 + 0 _col6 (type: string), _col6 (type: string) + 1 key (type: string), key (type: string) + Position of Big Table: 0 d TableScan alias: d @@ -418,31 +461,29 @@ STAGE PLANS: 1 Position of Big Table: 0 - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 condition expressions: 0 - 1 - 2 + 1 {key} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col6 + Position of Big Table: 1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -450,12 +491,21 @@ STAGE PLANS: 0 1 keys: - 0 - 1 + 0 _col6 (type: string), _col6 (type: string) + 1 key (type: string), key (type: string) Position of Big Table: 0 - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 + 1 + Position of Big Table: 0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -665,8 +715,7 @@ STAGE PLANS: name: default.bucket_small name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [c] - /bucket_big/ds=2008-04-09 [c] + /bucket_medium/ds=2008-04-08 [b] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -706,7 +755,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-4:MAPRED' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out index 69bd43a..06bd543 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_14.q.out @@ -85,14 +85,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -125,14 +124,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -226,14 +224,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -266,14 +263,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out index 10b20d8..69a35bd 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_15.q.out @@ -83,14 +83,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -123,14 +122,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -201,14 +199,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -241,14 +238,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 72242bb..9a00af8 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -147,16 +147,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -452,16 +451,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -780,16 +778,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -991,16 +988,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out index 35fa02f..d4bdc32 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out @@ -147,16 +147,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -325,16 +324,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -625,16 +623,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -904,16 +901,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1115,16 +1111,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 4fea70d..dffb432 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -163,16 +163,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -341,16 +340,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -641,16 +639,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -920,16 +917,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1131,16 +1127,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 1904cc2..66aa3f1 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -128,16 +128,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -296,16 +295,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -493,16 +491,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -664,16 +661,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -814,16 +810,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out index e5e2a6a..0ae629f 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out @@ -89,26 +89,15 @@ POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-buck explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -128,6 +117,17 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -135,19 +135,18 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -208,17 +207,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -238,6 +226,17 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + TableScan + alias: d + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -245,17 +244,16 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -310,8 +308,8 @@ explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src h on h POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -355,19 +353,18 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -488,17 +485,16 @@ STAGE PLANS: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) 2 UDFToDouble(key) (type: double) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -586,17 +582,16 @@ STAGE PLANS: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) 2 UDFToDouble(key) (type: double) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -658,17 +653,16 @@ STAGE PLANS: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) 2 UDFToDouble(key) (type: double) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -676,7 +670,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -687,7 +681,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -698,7 +692,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -718,19 +712,17 @@ STAGE PLANS: 1 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -783,14 +775,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -832,26 +823,15 @@ POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -871,6 +851,17 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -878,19 +869,18 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -938,26 +928,15 @@ POSTHOOK: query: -- A SMB join is being followed by a regular join on a non-buck explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -977,6 +956,17 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -984,19 +974,18 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1117,17 +1106,16 @@ STAGE PLANS: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) 2 UDFToDouble(key) (type: double) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1215,17 +1203,16 @@ STAGE PLANS: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) 2 UDFToDouble(key) (type: double) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1287,17 +1274,16 @@ STAGE PLANS: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) 2 UDFToDouble(key) (type: double) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1305,7 +1291,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -1316,7 +1302,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -1327,7 +1313,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -1347,19 +1333,17 @@ STAGE PLANS: 1 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1459,14 +1443,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1542,14 +1525,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1625,14 +1607,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1671,14 +1652,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1720,26 +1700,15 @@ POSTHOOK: query: -- A SMB join is being followed by a regular join on a bucketed explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1759,6 +1728,17 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1766,19 +1746,18 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index abb1db4..d3701a1 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -180,16 +180,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -409,16 +408,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -760,16 +758,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1135,16 +1132,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1394,16 +1390,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out index 9226dc6..f53a2f2 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out @@ -180,16 +180,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -409,16 +408,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -762,16 +760,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1137,16 +1134,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1396,16 +1392,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index 1a7fdf9..383af91 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -77,14 +77,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -161,19 +160,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -244,8 +240,8 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -267,19 +263,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -298,7 +291,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -391,14 +384,14 @@ on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-3 - Stage-9 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-2 + Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 - Stage-10 has a backup stage: Stage-3 + Stage-10 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-10 - Stage-3 + Stage-2 Stage-4 is a root stage - Stage-0 depends on stages: Stage-6, Stage-7, Stage-3 + Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 STAGE PLANS: Stage: Stage-1 @@ -420,34 +413,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Conditional Operator @@ -538,7 +525,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -590,34 +577,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -714,14 +695,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -817,14 +797,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -944,14 +923,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1061,14 +1039,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1182,17 +1159,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1279,14 +1254,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1362,14 +1336,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1462,14 +1435,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1573,14 +1545,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1694,14 +1665,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1755,14 +1725,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1798,14 +1767,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1908,19 +1876,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1976,19 +1941,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2026,19 +1988,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2111,11 +2070,11 @@ STAGE DEPENDENCIES: Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-6 @@ -2158,19 +2117,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2191,7 +2147,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2250,19 +2206,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2302,19 +2255,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2406,12 +2356,12 @@ STAGE DEPENDENCIES: Stage-11 is a root stage , consists of Stage-17, Stage-18, Stage-1 Stage-17 has a backup stage: Stage-1 Stage-9 depends on stages: Stage-17 - Stage-8 depends on stages: Stage-1, Stage-4, Stage-9, Stage-10, Stage-12, Stage-13 , consists of Stage-15, Stage-16, Stage-3 - Stage-15 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-4, Stage-9, Stage-10, Stage-12, Stage-13 , consists of Stage-15, Stage-16, Stage-2 + Stage-15 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-15 - Stage-16 has a backup stage: Stage-3 + Stage-16 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-16 - Stage-3 + Stage-2 Stage-18 has a backup stage: Stage-1 Stage-10 depends on stages: Stage-18 Stage-1 @@ -2421,7 +2371,7 @@ STAGE DEPENDENCIES: Stage-20 has a backup stage: Stage-4 Stage-13 depends on stages: Stage-20 Stage-4 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-3 + Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 STAGE PLANS: Stage: Stage-11 @@ -2464,19 +2414,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2485,15 +2432,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Conditional Operator @@ -2584,7 +2528,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2654,19 +2598,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2675,15 +2616,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -2704,34 +2642,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-14 Conditional Operator @@ -2773,19 +2705,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2794,15 +2723,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-20 Map Reduce Local Work @@ -2841,19 +2767,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2862,15 +2785,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -2891,34 +2811,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -3043,14 +2957,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3110,14 +3023,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3157,14 +3069,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3285,14 +3196,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3349,14 +3259,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3396,14 +3305,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3551,14 +3459,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3618,14 +3525,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3665,14 +3571,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3810,14 +3715,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3877,14 +3781,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3924,14 +3827,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4038,14 +3940,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4102,14 +4003,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4149,14 +4049,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4261,14 +4160,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4325,14 +4223,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4368,14 +4265,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4520,14 +4416,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4612,14 +4507,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4704,14 +4598,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4754,14 +4647,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4890,14 +4782,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -4954,14 +4845,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -5001,14 +4891,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/bucket_map_join_1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_1.q.out index b194a2b..36633a2 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_1.q.out @@ -156,19 +156,17 @@ STAGE PLANS: 1 key (type: string), value (type: string) Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucket_map_join_2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_2.q.out index 07c8878..b224ea1 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_2.q.out @@ -156,19 +156,17 @@ STAGE PLANS: 1 key (type: string), value (type: string) Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketcontext_1.q.out b/ql/src/test/results/clientpositive/bucketcontext_1.q.out index 0ea304d..c29f626 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_1.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_1.q.out @@ -248,19 +248,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -488,16 +486,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_2.q.out b/ql/src/test/results/clientpositive/bucketcontext_2.q.out index e961f06..4cd3520 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_2.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_2.q.out @@ -232,19 +232,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -472,16 +470,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_3.q.out b/ql/src/test/results/clientpositive/bucketcontext_3.q.out index 1de6211..56fd641 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_3.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_3.q.out @@ -278,19 +278,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -469,16 +467,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_4.q.out b/ql/src/test/results/clientpositive/bucketcontext_4.q.out index f0e369d..a5709ca 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_4.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_4.q.out @@ -294,19 +294,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -485,16 +483,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_5.q.out b/ql/src/test/results/clientpositive/bucketcontext_5.q.out index feb833d..c7d04c9 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_5.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_5.q.out @@ -166,19 +166,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -347,16 +345,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_6.q.out b/ql/src/test/results/clientpositive/bucketcontext_6.q.out index 209be0a..d899b3f 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_6.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_6.q.out @@ -184,19 +184,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -422,16 +420,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_7.q.out b/ql/src/test/results/clientpositive/bucketcontext_7.q.out index 11054d6..368267f 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_7.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_7.q.out @@ -311,19 +311,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -553,16 +551,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketcontext_8.q.out b/ql/src/test/results/clientpositive/bucketcontext_8.q.out index e992702..a9cf3ac 100644 --- a/ql/src/test/results/clientpositive/bucketcontext_8.q.out +++ b/ql/src/test/results/clientpositive/bucketcontext_8.q.out @@ -311,19 +311,17 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -553,16 +551,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/bucketmapjoin1.q.out index 9a76f94..12a9705 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin1.q.out @@ -440,13 +440,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -631,10 +631,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -662,11 +662,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -735,7 +735,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -804,7 +804,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -983,13 +983,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -1138,10 +1138,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -1174,11 +1174,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1262,7 +1262,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1346,7 +1346,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketmapjoin10.q.out b/ql/src/test/results/clientpositive/bucketmapjoin10.q.out index c77f417..22ef1e0 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin10.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin10.q.out @@ -335,19 +335,17 @@ STAGE PLANS: 1 key (type: int) Position of Big Table: 0 Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin11.q.out b/ql/src/test/results/clientpositive/bucketmapjoin11.q.out index c85ba92..15842c5 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin11.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin11.q.out @@ -353,19 +353,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -753,19 +751,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin12.q.out b/ql/src/test/results/clientpositive/bucketmapjoin12.q.out index 9e8c327..487fb4c 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin12.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin12.q.out @@ -266,19 +266,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -554,19 +552,17 @@ STAGE PLANS: 1 key (type: int) Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 760a2b8..695d12c 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -218,19 +218,17 @@ STAGE PLANS: 1 key (type: int) Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -558,19 +556,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -849,19 +845,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1142,19 +1136,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin2.q.out index 3936d40..8323945 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin2.q.out @@ -160,13 +160,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -356,10 +356,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -387,11 +387,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -460,7 +460,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -529,7 +529,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -712,13 +712,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -913,10 +913,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -949,11 +949,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1037,7 +1037,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1121,7 +1121,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -1316,13 +1316,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -1562,10 +1562,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -1598,11 +1598,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1686,7 +1686,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1770,7 +1770,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/bucketmapjoin3.q.out index b9c219b..84a91f8 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin3.q.out @@ -191,13 +191,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -387,10 +387,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -418,11 +418,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -491,7 +491,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -560,7 +560,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -750,13 +750,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -951,10 +951,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -987,11 +987,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1075,7 +1075,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1159,7 +1159,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/bucketmapjoin4.q.out index 2af66a2..d5c6ce0 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin4.q.out @@ -177,13 +177,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -322,10 +322,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -353,11 +353,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -426,7 +426,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -495,7 +495,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -659,13 +659,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -809,10 +809,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -845,11 +845,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -933,7 +933,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1017,7 +1017,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out index 0347457..68e9de3 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -227,13 +227,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -425,10 +425,10 @@ STAGE PLANS: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -456,11 +456,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -529,7 +529,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -598,7 +598,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -774,13 +774,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -977,10 +977,10 @@ STAGE PLANS: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -1013,11 +1013,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1101,7 +1101,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1185,7 +1185,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketmapjoin8.q.out b/ql/src/test/results/clientpositive/bucketmapjoin8.q.out index e573f59..4a5d0ae 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin8.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin8.q.out @@ -232,19 +232,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -537,19 +535,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin9.q.out b/ql/src/test/results/clientpositive/bucketmapjoin9.q.out index c6f94f5..48da33c 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin9.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin9.q.out @@ -232,19 +232,17 @@ STAGE PLANS: 1 key (type: int) Position of Big Table: 0 Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -562,19 +560,17 @@ STAGE PLANS: 1 key (type: int) Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index 258a962..dff48a3 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -135,13 +135,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -318,10 +318,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -349,11 +349,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -422,7 +422,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -491,7 +491,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index d99f5b4..1348f68 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -137,13 +137,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -373,10 +373,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -404,11 +404,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -477,7 +477,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -546,7 +546,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out index f6a1c5c..69d3843 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out @@ -97,7 +97,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -142,7 +142,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -230,7 +230,7 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -308,7 +308,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Stage: Stage-8 @@ -512,7 +512,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -557,7 +557,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 950cd98..2557733 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -118,7 +118,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -225,7 +225,7 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -271,14 +271,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -303,7 +300,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Stage: Stage-8 @@ -343,14 +340,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -382,14 +376,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int), _col1 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Extract File Output Operator diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 81ad83f..fc903c3 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -75,7 +75,7 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -153,7 +153,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Stage: Stage-8 diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out index e6976db..d885b80 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out @@ -75,7 +75,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -120,7 +120,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -353,7 +353,7 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -431,7 +431,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Stage: Stage-8 @@ -761,7 +761,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -806,7 +806,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -917,7 +917,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -962,7 +962,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -1085,7 +1085,7 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 Stage-1 @@ -1163,7 +1163,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator Stage: Stage-8 diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out index 99c157d..6be4e57 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out @@ -75,7 +75,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -120,7 +120,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index e28acfd..f4d2f09 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -73,7 +73,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -118,7 +118,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -205,7 +205,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -250,7 +250,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') diff --git a/ql/src/test/results/clientpositive/cbo_union.q.out b/ql/src/test/results/clientpositive/cbo_union.q.out index 4084a15..eb02b03 100644 --- a/ql/src/test/results/clientpositive/cbo_union.q.out +++ b/ql/src/test/results/clientpositive/cbo_union.q.out @@ -25,13 +25,13 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 null null NULL NULL NULL 2014 null null NULL NULL NULL 2014 1 1 1 1.0 true 2014 @@ -45,13 +45,13 @@ null null NULL NULL NULL 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 null null NULL NULL NULL 2014 null null NULL NULL NULL 2014 PREHOOK: query: select key from (select key, c_int from (select * from cbo_t1 union all select * from cbo_t2 where cbo_t2.key >=0)r1 union all select key, c_int from cbo_t3)r2 where key >=0 order by key diff --git a/ql/src/test/results/clientpositive/cluster.q.out b/ql/src/test/results/clientpositive/cluster.q.out index f5573c9..15316f8 100644 --- a/ql/src/test/results/clientpositive/cluster.q.out +++ b/ql/src/test/results/clientpositive/cluster.q.out @@ -470,7 +470,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -479,8 +479,9 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -489,7 +490,6 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -499,16 +499,12 @@ STAGE PLANS: 1 outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -563,7 +559,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -574,7 +570,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -658,7 +654,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -669,7 +665,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -752,7 +748,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -761,8 +757,9 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -771,7 +768,6 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -867,16 +863,12 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -889,16 +881,12 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/column_access_stats.q.out b/ql/src/test/results/clientpositive/column_access_stats.q.out index 103e6e2..b392ef2 100644 --- a/ql/src/test/results/clientpositive/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/column_access_stats.q.out @@ -386,27 +386,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -501,27 +501,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((val = 3) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((val = 3) and key is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -583,35 +583,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = 6) and val is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: ((key = 5) and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: val (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 5) and val is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 6) and val is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: val (type: string) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -679,44 +679,44 @@ JOIN T3 ON T3.key = T4.key PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -726,21 +726,23 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + TableScan alias: t3 Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -752,12 +754,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/combine2.q.out b/ql/src/test/results/clientpositive/combine2.q.out index 921dd90..1fc1f3e 100644 --- a/ql/src/test/results/clientpositive/combine2.q.out +++ b/ql/src/test/results/clientpositive/combine2.q.out @@ -22,7 +22,7 @@ create table combine2(key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@combine2 -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size @@ -40,7 +40,7 @@ select * from ( PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git a/ql/src/test/results/clientpositive/complex_alias.q.out b/ql/src/test/results/clientpositive/complex_alias.q.out index d8264bd..4060fa1 100644 --- a/ql/src/test/results/clientpositive/complex_alias.q.out +++ b/ql/src/test/results/clientpositive/complex_alias.q.out @@ -64,8 +64,8 @@ FROM (SELECT Sum(agg1.col2) AS a1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -75,6 +75,77 @@ STAGE PLANS: TableScan alias: agg1 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col0 (type: int), col2 (type: double) + outputColumnNames: col0, col2 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(col2) + keys: col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col1} {VALUE._col2} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: agg1 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (col0 = col0) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -141,77 +212,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col1} {VALUE._col2} - outputColumnNames: _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: agg1 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col0 (type: int), col2 (type: double) - outputColumnNames: col0, col2 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(col2) - keys: col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/constprog2.q.out b/ql/src/test/results/clientpositive/constprog2.q.out index 71a44f3..4c8b7c8 100644 --- a/ql/src/test/results/clientpositive/constprog2.q.out +++ b/ql/src/test/results/clientpositive/constprog2.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 86) (type: boolean) @@ -24,9 +24,8 @@ STAGE PLANS: key expressions: '86' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 86) (type: boolean) @@ -35,6 +34,7 @@ STAGE PLANS: key expressions: '86' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -90,7 +90,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) = 86) (type: boolean) @@ -99,9 +99,8 @@ STAGE PLANS: key expressions: '86' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) = 86) (type: boolean) @@ -110,6 +109,7 @@ STAGE PLANS: key expressions: '86' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer1.q.out b/ql/src/test/results/clientpositive/correlationoptimizer1.q.out index 53fd47c..1b6d2b2 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer1.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer1.q.out @@ -61,22 +61,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -95,21 +91,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -214,33 +206,25 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -360,22 +344,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -385,21 +365,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -518,22 +494,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -552,21 +524,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -680,33 +648,25 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -810,22 +770,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -844,21 +800,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -957,33 +909,25 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1087,22 +1031,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1121,21 +1061,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1233,22 +1169,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1267,21 +1199,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1358,21 +1286,21 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1382,22 +1310,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey1} outputColumnNames: _col0, _col6 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1484,21 +1408,21 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1508,22 +1432,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey1} outputColumnNames: _col0, _col6 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1643,22 +1563,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1677,21 +1593,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1790,33 +1702,25 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1920,22 +1824,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1954,21 +1854,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2066,22 +1962,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2100,21 +1992,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2220,22 +2108,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2254,21 +2138,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2366,22 +2246,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2400,21 +2276,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2521,22 +2393,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2555,21 +2423,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2674,22 +2538,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2708,21 +2568,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2828,22 +2684,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2862,21 +2714,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2980,22 +2828,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -3014,21 +2858,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out index 6e7904a..00d0c1e 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer10.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer10.q.out @@ -33,13 +33,13 @@ LEFT SEMI JOIN src yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -73,24 +73,20 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -106,22 +102,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: yy Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -141,13 +140,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -232,26 +224,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: yy - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -273,6 +245,26 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: yy + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE @@ -284,43 +276,35 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -459,21 +443,17 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -651,31 +631,27 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 123 Data size: 1267 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 + Mux Operator + Statistics: Num rows: 123 Data size: 1267 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -783,21 +759,17 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -987,31 +959,27 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 360 Data size: 3824 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 + Mux Operator + Statistics: Num rows: 360 Data size: 3824 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer11.q.out b/ql/src/test/results/clientpositive/correlationoptimizer11.q.out index 4819152..d35974c 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer11.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer11.q.out @@ -68,27 +68,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -98,22 +98,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 588 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 588 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 588 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -196,27 +192,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 100 Data size: 1070 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 50 Data size: 535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 63 Data size: 634 Basic stats: COMPLETE Column stats: NONE @@ -228,29 +224,25 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -313,7 +305,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -324,7 +316,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -343,22 +335,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -452,7 +440,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -463,7 +451,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -484,29 +472,25 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out index 0d95e4a..8e01833 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -25,26 +25,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), key (type: string) sort order: ++ Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Extract - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _wcol0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -60,14 +60,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -94,26 +94,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), key (type: string) sort order: ++ Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Extract - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _wcol0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out index 29001f1..fd75fc9 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer13.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer13.q.out @@ -58,26 +58,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x1 + alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c1 is not null) and c3 is not null) (type: boolean) - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + predicate: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) outputColumnNames: c3, c1 - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: c3 (type: string), c1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -85,11 +85,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -105,14 +105,14 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -159,26 +159,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + predicate: (((c2 > 100) and c1 is not null) and c3 is not null) (type: boolean) + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c3 (type: string), c1 (type: int) outputColumnNames: c3, c1 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: c3 (type: string), c1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -186,11 +186,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer14.q.out b/ql/src/test/results/clientpositive/correlationoptimizer14.q.out index d4fc75e..166ba4b 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer14.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer14.q.out @@ -43,25 +43,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -77,14 +77,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -111,25 +111,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -172,25 +172,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -206,14 +206,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -240,25 +240,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -301,26 +301,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -336,14 +336,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -370,26 +370,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -485,36 +485,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Demux Operator @@ -664,26 +664,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -699,14 +699,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -733,26 +733,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -797,25 +797,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -831,14 +831,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -865,25 +865,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -926,26 +926,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -961,14 +961,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -995,26 +995,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1110,36 +1110,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Demux Operator @@ -1289,44 +1289,32 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1336,15 +1324,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1370,26 +1358,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1485,28 +1477,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1522,6 +1492,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE @@ -1556,31 +1544,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out index 5186873..6b8cf50 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -23,14 +23,14 @@ JOIN src yy ON xx.key=yy.key ORDER BY xx.key, xx.cnt, yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -64,24 +64,20 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -97,22 +93,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: yy Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -123,13 +122,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -139,18 +131,14 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -255,18 +243,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: yy - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -277,16 +265,16 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: yy + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE @@ -298,43 +286,35 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE Join Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/correlationoptimizer2.q.out index 7073a30..8782ac1 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer2.q.out @@ -36,44 +36,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -83,14 +75,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -100,22 +92,18 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -147,44 +135,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -235,49 +215,41 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) + TableScan + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE @@ -287,70 +259,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 262 Data size: 2744 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 262 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 262 Data size: 2744 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 262 Data size: 2744 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -431,23 +387,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -455,17 +411,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -475,14 +427,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -492,22 +444,18 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -539,23 +487,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -563,17 +511,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -624,42 +568,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator @@ -670,70 +614,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -814,23 +742,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -838,17 +766,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -858,14 +782,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -875,22 +799,18 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -922,23 +842,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -946,17 +866,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1007,42 +923,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator @@ -1053,70 +969,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1197,23 +1097,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1221,17 +1121,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1241,14 +1137,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1258,22 +1154,18 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1305,23 +1197,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1329,17 +1221,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1390,42 +1278,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator @@ -1436,70 +1324,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 2746 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 524 Data size: 5492 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1581,23 +1453,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1605,11 +1477,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1625,13 +1497,13 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1641,22 +1513,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1675,21 +1543,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1721,23 +1585,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1745,11 +1609,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1817,42 +1681,42 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(value) keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator @@ -1877,22 +1741,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) @@ -1913,22 +1773,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1947,21 +1803,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2040,9 +1892,9 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 + Stage-2 depends on stages: Stage-1, Stage-5 Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage + Stage-5 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -2050,44 +1902,43 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: z + alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: value (type: string) + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2097,15 +1948,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -2115,21 +1966,17 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2157,51 +2004,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: x + alias: z Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -2252,28 +2088,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2286,6 +2100,24 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan + alias: z + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2307,70 +2139,54 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/correlationoptimizer3.q.out b/ql/src/test/results/clientpositive/correlationoptimizer3.q.out index ccd18c4..c59c0cc 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer3.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer3.q.out @@ -69,22 +69,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -102,17 +98,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -122,7 +114,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -186,28 +178,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -217,16 +209,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -332,47 +320,39 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Join Operator condition map: Inner Join 0 to 1 @@ -381,35 +361,31 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -544,22 +520,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -577,16 +549,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -598,35 +566,31 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 825 Data size: 8763 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 + Mux Operator + Statistics: Num rows: 825 Data size: 8763 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Mux Operator Statistics: Num rows: 825 Data size: 8763 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -762,16 +726,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -788,7 +748,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -845,27 +805,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -875,22 +835,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce @@ -908,17 +864,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1024,35 +976,31 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col1, _col2, _col3 + Select Operator + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Join Operator condition map: Inner Join 0 to 1 @@ -1061,47 +1009,39 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Select Operator + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1236,16 +1176,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1263,22 +1199,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1315,35 +1247,31 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 825 Data size: 8763 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col1, _col2, _col3 + Mux Operator + Statistics: Num rows: 825 Data size: 8763 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: bigint), _col1 (type: string) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/correlationoptimizer4.q.out b/ql/src/test/results/clientpositive/correlationoptimizer4.q.out index 80496d6..7991eba 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer4.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer4.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -99,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -120,22 +120,18 @@ STAGE PLANS: 2 outputColumnNames: _col5 Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -154,21 +150,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -254,7 +246,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -265,7 +257,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -288,33 +280,25 @@ STAGE PLANS: 2 outputColumnNames: _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -454,22 +438,18 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col5 Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 37 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -479,21 +459,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 18 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -581,7 +557,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -589,7 +565,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -607,22 +583,18 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -641,21 +613,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -738,7 +706,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -746,7 +714,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -766,33 +734,25 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -882,7 +842,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -890,7 +850,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -908,22 +868,18 @@ STAGE PLANS: 2 outputColumnNames: _col5 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -942,21 +898,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1044,7 +996,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1052,7 +1004,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1070,22 +1022,18 @@ STAGE PLANS: 2 {KEY.reducesinkkey0} outputColumnNames: _col10 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col10 (type: int) - outputColumnNames: _col10 + Group By Operator + aggregations: count(1) + keys: _col10 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col10 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1104,21 +1052,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1201,7 +1145,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1209,7 +1153,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1229,33 +1173,25 @@ STAGE PLANS: 2 {KEY.reducesinkkey0} outputColumnNames: _col10 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col10 (type: int) - outputColumnNames: _col10 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col10 (type: int) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator - aggregations: count(1) - keys: _col10 (type: int) - mode: complete + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1345,7 +1281,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1353,7 +1289,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1371,22 +1307,18 @@ STAGE PLANS: 2 outputColumnNames: _col5 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1405,21 +1337,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1507,7 +1435,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1515,7 +1443,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1533,22 +1461,18 @@ STAGE PLANS: 2 outputColumnNames: _col5 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1567,21 +1491,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1665,7 +1585,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: z + alias: y Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1673,7 +1593,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: z Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -1691,22 +1611,18 @@ STAGE PLANS: 2 outputColumnNames: _col5 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1725,21 +1641,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index ba74fa2..5c8aaa8 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -110,57 +110,52 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 + Stage-2 depends on stages: Stage-1, Stage-5 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: m - Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE TableScan - alias: n - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col6 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 1 + outputColumnNames: _col0 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -170,14 +165,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -213,44 +208,45 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: m + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y - Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + alias: n + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -314,50 +310,50 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: m - Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE TableScan - alias: n - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + alias: m + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y - Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + alias: n + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Demux Operator Statistics: Num rows: 1479 Data size: 6053 Basic stats: COMPLETE Column stats: NONE @@ -369,32 +365,28 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col3 + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 Join Operator condition map: Inner Join 0 to 1 @@ -509,21 +501,21 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - d:m + b:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - d:m + b:y TableScan - alias: m - Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: - 0 - 1 {val} + 0 {key} + 1 keys: 0 key (type: int) 1 key (type: int) @@ -532,32 +524,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: n - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} - 1 {val} + 1 keys: 0 key (type: int) 1 key (type: int) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -567,11 +555,11 @@ STAGE PLANS: Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -624,11 +612,11 @@ STAGE PLANS: Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -673,14 +661,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -706,21 +694,21 @@ STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - b:y + d:m Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - b:y + d:m TableScan - alias: y - Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + alias: m + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 724 Data size: 2897 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator condition expressions: - 0 {key} - 1 + 0 + 1 {val} keys: 0 key (type: int) 1 key (type: int) @@ -729,26 +717,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: n + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} - 1 + 1 {val} keys: 0 key (type: int) 1 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col6 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/correlationoptimizer6.q.out index e1c46db..852fac3 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer6.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer6.q.out @@ -38,26 +38,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -66,23 +66,19 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -92,7 +88,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -100,17 +96,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -120,14 +112,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -137,14 +129,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -155,26 +147,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -183,23 +175,19 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce @@ -209,7 +197,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -217,17 +205,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -296,48 +280,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE @@ -349,43 +333,35 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Join Operator condition map: Inner Join 0 to 1 @@ -394,43 +370,35 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -541,10 +509,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -555,29 +523,25 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -588,23 +552,19 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -616,62 +576,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 288 Data size: 3018 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 288 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 288 Data size: 3018 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 288 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -749,39 +701,31 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -791,7 +735,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan alias: xx @@ -812,14 +756,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col5, _col6 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -892,22 +836,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: xx Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -949,31 +889,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col5, _col6 + Mux Operator + Statistics: Num rows: 394 Data size: 4127 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1075,22 +1011,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1108,17 +1040,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1128,7 +1056,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan alias: xx @@ -1149,14 +1077,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col5, _col6 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1288,43 +1216,35 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col5, _col6 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1386,13 +1306,13 @@ JOIN src yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1426,24 +1346,20 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1459,22 +1375,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan alias: yy Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1485,13 +1404,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -1597,18 +1509,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: yy - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1619,16 +1531,16 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: yy + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE @@ -1640,43 +1552,35 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -1783,13 +1687,13 @@ JOIN ON zz.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1823,24 +1727,20 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1856,38 +1756,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: zz - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan alias: xx @@ -1900,6 +1785,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: zz + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1910,14 +1806,14 @@ STAGE PLANS: 1 2 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col10, _col11 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col10 (type: string), _col11 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2017,7 +1913,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: zz + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2028,7 +1924,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2039,7 +1935,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: zz Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2094,45 +1990,37 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col10, _col11 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col10 (type: string), _col11 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col10 (type: string), _col11 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2220,13 +2108,13 @@ ON xx.key=yy.key JOIN src zz ON yy.key=zz.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -2260,24 +2148,20 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2293,38 +2177,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: zz - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan alias: xx @@ -2337,6 +2206,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: zz + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2347,14 +2227,14 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} 2 outputColumnNames: _col0, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2448,7 +2328,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: zz + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2459,7 +2339,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2470,7 +2350,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: y + alias: zz Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2525,45 +2405,37 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col0, _col5, _col6 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 1526 Data size: 16134 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 - outputColumnNames: _col0, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col6 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2654,10 +2526,10 @@ JOIN src z ON tmp.key=z.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 + Stage-2 depends on stages: Stage-1, Stage-5 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 - Stage-6 is a root stage + Stage-5 is a root stage Stage-0 depends on stages: Stage-4 STAGE PLANS: @@ -2665,44 +2537,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2712,14 +2576,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -2729,23 +2593,19 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col3) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2755,7 +2615,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -2763,17 +2623,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -2783,7 +2639,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) TableScan alias: z @@ -2805,61 +2661,53 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -2953,27 +2801,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2987,27 +2831,23 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE @@ -3017,114 +2857,90 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 256 Data size: 2700 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 256 Data size: 2700 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 512 Data size: 5400 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 + Mux Operator + Statistics: Num rows: 512 Data size: 5400 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col3 + Group By Operator + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col3) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 256 Data size: 2700 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 256 Data size: 2700 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 512 Data size: 5400 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 + Mux Operator + Statistics: Num rows: 512 Data size: 5400 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col3 + Group By Operator + aggregations: sum(_col1), sum(_col3) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: sum(_col1), sum(_col3) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 513 Data size: 5411 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -3251,80 +3067,71 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} + 0 {KEY.reducesinkkey0} 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -3334,15 +3141,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -3351,17 +3158,13 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -3370,15 +3173,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3389,79 +3192,72 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan alias: y - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} + 0 {KEY.reducesinkkey0} {VALUE._col0} 1 - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 54 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -3521,13 +3317,13 @@ JOIN ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -3562,24 +3358,20 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -3595,29 +3387,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: bigint) - TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3639,9 +3420,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) Reduce Operator Tree: Demux Operator - Statistics: Num rows: 163 Data size: 1653 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 301 Data size: 3119 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -3650,45 +3438,37 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 301 Data size: 3119 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 163 Data size: 1653 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 163 Data size: 1653 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 301 Data size: 3119 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -3768,10 +3548,10 @@ ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-9 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-10 + Stage-9 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-10 @@ -3796,7 +3576,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -3816,22 +3596,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -3840,17 +3616,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Map Reduce Local Work @@ -3874,7 +3646,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -3882,7 +3654,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) TableScan alias: y @@ -3901,60 +3673,52 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Demux Operator - Statistics: Num rows: 151 Data size: 1563 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 289 Data size: 3029 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 776 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 776 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 226 Data size: 2339 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + Statistics: Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 433 Data size: 4538 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 226 Data size: 2339 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 433 Data size: 4538 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 diff --git a/ql/src/test/results/clientpositive/correlationoptimizer7.q.out b/ql/src/test/results/clientpositive/correlationoptimizer7.q.out index 17649e6..15c0eb2 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer7.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer7.q.out @@ -19,14 +19,14 @@ JOIN src1 yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-9 is a root stage + Stage-2 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: xx:y @@ -48,7 +48,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -68,22 +68,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -92,19 +88,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: yy @@ -126,7 +118,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -140,14 +132,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -213,12 +205,12 @@ JOIN src1 yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-1 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: xx:y @@ -240,7 +232,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -272,22 +264,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -299,31 +287,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 432 Data size: 4530 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 432 Data size: 4530 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 432 Data size: 4530 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -416,14 +400,14 @@ JOIN src1 yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-9 is a root stage + Stage-2 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: xx:y @@ -445,7 +429,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -465,22 +449,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -489,19 +469,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: yy @@ -523,7 +499,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -537,14 +513,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -610,12 +586,12 @@ JOIN src1 yy ON xx.key=yy.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-1 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-1 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: xx:y @@ -637,7 +613,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -669,22 +645,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -696,31 +668,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 432 Data size: 4530 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 432 Data size: 4530 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 432 Data size: 4530 Basic stats: COMPLETE Column stats: NONE Join Operator diff --git a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 3c737d5..2163365 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -48,22 +48,18 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -71,16 +67,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -145,22 +137,18 @@ STAGE PLANS: Filter Operator predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -168,16 +156,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -252,44 +236,36 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -311,66 +287,58 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 172 Data size: 1807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 172 Data size: 1807 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 344 Data size: 3614 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 689 Data size: 7239 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col1, _col2, _col3 + Union + Statistics: Num rows: 344 Data size: 3614 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 689 Data size: 7239 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 172 Data size: 1807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 172 Data size: 1807 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 344 Data size: 3614 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 689 Data size: 7239 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col1, _col2, _col3 + Union + Statistics: Num rows: 344 Data size: 3614 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 689 Data size: 7239 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 689 Data size: 7239 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -480,22 +448,18 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -503,16 +467,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -597,16 +557,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -701,22 +657,18 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -757,66 +709,58 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 99 Data size: 1002 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1002 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 198 Data size: 2004 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 397 Data size: 4019 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Union + Statistics: Num rows: 198 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 397 Data size: 4019 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 99 Data size: 1002 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1002 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 198 Data size: 2004 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 397 Data size: 4019 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Union + Statistics: Num rows: 198 Data size: 2004 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 397 Data size: 4019 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 397 Data size: 4019 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -940,22 +884,18 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -963,16 +903,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1037,22 +973,18 @@ STAGE PLANS: Filter Operator predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(1) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1117,22 +1049,18 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1211,22 +1139,18 @@ STAGE PLANS: Filter Operator predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/correlationoptimizer9.q.out b/ql/src/test/results/clientpositive/correlationoptimizer9.q.out index 864bb21..ea0bef3 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer9.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer9.q.out @@ -51,44 +51,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x1 + alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c2 > 100) (type: boolean) + predicate: (c1 < 120) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c2 (type: int) - outputColumnNames: c2 + Group By Operator + aggregations: count(1) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: c2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -98,14 +90,14 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -115,14 +107,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 376 Data size: 8402 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 376 Data size: 8402 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 376 Data size: 8402 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -132,44 +124,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c1 < 120) (type: boolean) + predicate: (c2 > 100) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: c1 + Group By Operator + aggregations: count(1) + keys: c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -231,49 +215,41 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x1 + alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c2 > 100) (type: boolean) + predicate: (c1 < 120) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c2 (type: int) - outputColumnNames: c2 + Group By Operator + aggregations: count(1) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: c2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan - alias: x + alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c1 < 120) (type: boolean) + predicate: (c2 > 100) (type: boolean) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int) - outputColumnNames: c1 + Group By Operator + aggregations: count(1) + keys: c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE @@ -283,62 +259,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 342 Data size: 7639 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} + Mux Operator + Statistics: Num rows: 684 Data size: 15278 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -400,44 +368,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x1 + alias: x Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c2 > 100) and c1 is not null) and c3 is not null) (type: boolean) - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: c1, c3 - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: c1 (type: int), c3 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + predicate: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: c1 (type: int), c3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -447,14 +407,14 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 43 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -464,14 +424,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 93 Data size: 2087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 93 Data size: 2087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 93 Data size: 2087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 188 Data size: 4200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -481,26 +441,26 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + predicate: (((c2 > 100) and c1 is not null) and c3 is not null) (type: boolean) + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c3 (type: string) outputColumnNames: c1, c3 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: c1 (type: int), c3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -508,17 +468,13 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 85 Data size: 1898 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -578,6 +534,24 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: x + Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((c1 < 120) and c3 is not null) (type: boolean) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: c1 (type: int), c3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan alias: x1 Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -599,28 +573,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 86 Data size: 1921 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - TableScan - alias: x - Statistics: Num rows: 1028 Data size: 22964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((c1 < 120) and c3 is not null) (type: boolean) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c1 (type: int), c3 (type: string) - outputColumnNames: c1, c3 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: c1 (type: int), c3 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 171 Data size: 3819 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reduce Operator Tree: Demux Operator Statistics: Num rows: 257 Data size: 5740 Basic stats: COMPLETE Column stats: NONE @@ -630,62 +582,54 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 128 Data size: 2858 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 128 Data size: 2858 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 256 Data size: 5716 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + Mux Operator + Statistics: Num rows: 256 Data size: 5716 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 128 Data size: 2858 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 128 Data size: 2858 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 256 Data size: 5716 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + Mux Operator + Statistics: Num rows: 256 Data size: 5716 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: int), _col4 (type: string), _col2 (type: bigint), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cross_join.q.out b/ql/src/test/results/clientpositive/cross_join.q.out index 654e105..6a657e9 100644 --- a/ql/src/test/results/clientpositive/cross_join.q.out +++ b/ql/src/test/results/clientpositive/cross_join.q.out @@ -14,18 +14,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -69,18 +69,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -123,7 +123,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -134,7 +134,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/cross_product_check_1.q.out b/ql/src/test/results/clientpositive/cross_product_check_1.q.out index 87e356d..f56f6ea 100644 --- a/ql/src/test/results/clientpositive/cross_product_check_1.q.out +++ b/ql/src/test/results/clientpositive/cross_product_check_1.q.out @@ -38,18 +38,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: Join Operator @@ -226,21 +226,17 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -256,17 +252,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -274,7 +266,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan alias: a @@ -348,21 +340,17 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -378,17 +366,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -396,7 +380,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan alias: a @@ -432,7 +416,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[21][tables = [ss, od1]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [ss, od1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -443,16 +427,83 @@ POSTHOOK: query: explain select * from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-5 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan alias: d1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -483,41 +534,11 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -525,76 +546,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/cross_product_check_2.q.out index 7cf3e9c..fa5c784 100644 --- a/ql/src/test/results/clientpositive/cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/cross_product_check_2.q.out @@ -250,20 +250,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -271,17 +267,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Map Reduce Local Work @@ -387,20 +379,16 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -408,17 +396,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Map Reduce Local Work @@ -476,7 +460,7 @@ STAGE PLANS: Warning: Map Join MAPJOIN[97][bigTable=?] in task 'Stage-7:MAPRED' is a cross product Warning: Map Join MAPJOIN[64][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[21][tables = [ss, od1]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[21][tables = [ss, od1]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -486,19 +470,19 @@ POSTHOOK: query: explain select * from (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-8 depends on stages: Stage-2, Stage-5 , consists of Stage-10, Stage-11, Stage-3 - Stage-10 has a backup stage: Stage-3 + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-10, Stage-11, Stage-2 + Stage-10 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-10 - Stage-11 has a backup stage: Stage-3 + Stage-11 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-11 - Stage-3 + Stage-2 Stage-12 is a root stage - Stage-2 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-3 + Stage-4 depends on stages: Stage-12 + Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -523,17 +507,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-8 Conditional Operator @@ -541,11 +521,11 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -584,11 +564,11 @@ STAGE PLANS: Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -624,18 +604,18 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan Reduce Output Operator sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator @@ -645,14 +625,14 @@ STAGE PLANS: 0 {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -680,7 +660,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -700,20 +680,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -721,17 +697,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ctas.q.out b/ql/src/test/results/clientpositive/ctas.q.out index c1e2dcc..f959237 100644 --- a/ql/src/test/results/clientpositive/ctas.q.out +++ b/ql/src/test/results/clientpositive/ctas.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/ctas_colname.q.out b/ql/src/test/results/clientpositive/ctas_colname.q.out index 252aa0f..24e5904 100644 --- a/ql/src/test/results/clientpositive/ctas_colname.q.out +++ b/ql/src/test/results/clientpositive/ctas_colname.q.out @@ -1094,22 +1094,18 @@ STAGE PLANS: Filter Operator predicate: (key < 9) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1230,22 +1226,18 @@ STAGE PLANS: Filter Operator predicate: (key < 9) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/decimal_join2.q.out b/ql/src/test/results/clientpositive/decimal_join2.q.out index f9c33b7..7c1ac56 100644 --- a/ql/src/test/results/clientpositive/decimal_join2.q.out +++ b/ql/src/test/results/clientpositive/decimal_join2.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -66,7 +66,7 @@ STAGE PLANS: Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) TableScan - alias: a + alias: b Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/describe_database.q.out b/ql/src/test/results/clientpositive/describe_database.q.out index 45e5a42..f2f0533 100644 --- a/ql/src/test/results/clientpositive/describe_database.q.out +++ b/ql/src/test/results/clientpositive/describe_database.q.out @@ -9,6 +9,11 @@ PREHOOK: type: DESCDATABASE POSTHOOK: query: desc database extended test_db POSTHOOK: type: DESCDATABASE test_db location/in/test hive_test_user USER {key2=value2, key1=value1} +PREHOOK: query: desc schema extended test_db +PREHOOK: type: DESCDATABASE +POSTHOOK: query: desc schema extended test_db +POSTHOOK: type: DESCDATABASE +test_db location/in/test hive_test_user USER {key2=value2, key1=value1} PREHOOK: query: drop database test_db PREHOOK: type: DROPDATABASE PREHOOK: Input: database:test_db diff --git a/ql/src/test/results/clientpositive/describe_database_json.q.out b/ql/src/test/results/clientpositive/describe_database_json.q.out index 3fa0ca9..bedcae1 100644 --- a/ql/src/test/results/clientpositive/describe_database_json.q.out +++ b/ql/src/test/results/clientpositive/describe_database_json.q.out @@ -15,6 +15,16 @@ PREHOOK: type: DESCDATABASE POSTHOOK: query: DESCRIBE DATABASE EXTENDED jsondb1 POSTHOOK: type: DESCDATABASE #### A masked pattern was here #### +PREHOOK: query: DESCRIBE SCHEMA jsondb1 +PREHOOK: type: DESCDATABASE +POSTHOOK: query: DESCRIBE SCHEMA jsondb1 +POSTHOOK: type: DESCDATABASE +#### A masked pattern was here #### +PREHOOK: query: DESCRIBE SCHEMA EXTENDED jsondb1 +PREHOOK: type: DESCDATABASE +POSTHOOK: query: DESCRIBE SCHEMA EXTENDED jsondb1 +POSTHOOK: type: DESCDATABASE +#### A masked pattern was here #### PREHOOK: query: SHOW DATABASES PREHOOK: type: SHOWDATABASES POSTHOOK: query: SHOW DATABASES diff --git a/ql/src/test/results/clientpositive/drop_partition_with_stats.q.out b/ql/src/test/results/clientpositive/drop_partition_with_stats.q.out new file mode 100644 index 0000000..e27e557 --- /dev/null +++ b/ql/src/test/results/clientpositive/drop_partition_with_stats.q.out @@ -0,0 +1,496 @@ +PREHOOK: query: -- This test verifies that a table partition could be dropped with columns stats computed +-- The column stats for a partitioned table will go to PART_COL_STATS +CREATE DATABASE IF NOT EXISTS partstatsdb1 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:partstatsdb1 +POSTHOOK: query: -- This test verifies that a table partition could be dropped with columns stats computed +-- The column stats for a partitioned table will go to PART_COL_STATS +CREATE DATABASE IF NOT EXISTS partstatsdb1 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:partstatsdb1 +PREHOOK: query: USE partstatsdb1 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:partstatsdb1 +POSTHOOK: query: USE partstatsdb1 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:partstatsdb1 +PREHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:partstatsdb1 +PREHOOK: Output: partstatsdb1@testtable +POSTHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:partstatsdb1 +POSTHOOK: Output: partstatsdb1@testtable +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p11', Part2='P12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable +POSTHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p21', Part2='P22') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p21', Part2='P22') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable +POSTHOOK: Output: partstatsdb1@testtable@part1=p21/part2=P22 +PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable +PREHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 +PREHOOK: Input: partstatsdb1@testtable@part1=p21/part2=P22 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable +POSTHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 +POSTHOOK: Input: partstatsdb1@testtable@part1=p21/part2=P22 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable +PREHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable +POSTHOOK: Input: partstatsdb1@testtable@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:partstatsdb1 +PREHOOK: Output: partstatsdb1@TestTable1 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:partstatsdb1 +POSTHOOK: Output: partstatsdb1@TestTable1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P11') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P11') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P11 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p21', Part2='P22') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p21', Part2='P22') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p21/part2=P22 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p31', Part2='P32') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p31', Part2='P32') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p31/part2=P32 +PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable1 +PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P11 +PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 +PREHOOK: Input: partstatsdb1@testtable1@part1=p21/part2=P22 +PREHOOK: Input: partstatsdb1@testtable1@part1=p31/part2=P32 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable1 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P11 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p21/part2=P22 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p31/part2=P32 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable1 +PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P11 +PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable1 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P11 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable1 +PREHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable1 +POSTHOOK: Input: partstatsdb1@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:partstatsdb1 +PREHOOK: Output: partstatsdb1@TESTTABLE2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:partstatsdb1 +POSTHOOK: Output: partstatsdb1@TESTTABLE2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p21', Part2='P22') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p21', Part2='P22') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p21/part2=P22 +PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable2 +PREHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 +PREHOOK: Input: partstatsdb1@testtable2@part1=p21/part2=P22 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable2 +POSTHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 +POSTHOOK: Input: partstatsdb1@testtable2@part1=p21/part2=P22 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb1@testtable2 +PREHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb1@testtable2 +POSTHOOK: Input: partstatsdb1@testtable2@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: ALTER TABLE partstatsdb1.testtable DROP PARTITION (part1='p11', Part2='P12') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: partstatsdb1@testtable +PREHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +POSTHOOK: query: ALTER TABLE partstatsdb1.testtable DROP PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: partstatsdb1@testtable +POSTHOOK: Output: partstatsdb1@testtable@part1=p11/part2=P12 +PREHOOK: query: ALTER TABLE partstatsdb1.TestTable1 DROP PARTITION (part1='p11', Part2='P12') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: partstatsdb1@testtable1 +PREHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +POSTHOOK: query: ALTER TABLE partstatsdb1.TestTable1 DROP PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1@part1=p11/part2=P12 +PREHOOK: query: ALTER TABLE partstatsdb1.TESTTABLE2 DROP PARTITION (part1='p11', Part2='P12') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: partstatsdb1@testtable2 +PREHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +POSTHOOK: query: ALTER TABLE partstatsdb1.TESTTABLE2 DROP PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: partstatsdb1@testtable2 +POSTHOOK: Output: partstatsdb1@testtable2@part1=p11/part2=P12 +PREHOOK: query: DROP TABLE partstatsdb1.testtable +PREHOOK: type: DROPTABLE +PREHOOK: Input: partstatsdb1@testtable +PREHOOK: Output: partstatsdb1@testtable +POSTHOOK: query: DROP TABLE partstatsdb1.testtable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: partstatsdb1@testtable +POSTHOOK: Output: partstatsdb1@testtable +PREHOOK: query: DROP TABLE partstatsdb1.TestTable1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: partstatsdb1@testtable1 +PREHOOK: Output: partstatsdb1@testtable1 +POSTHOOK: query: DROP TABLE partstatsdb1.TestTable1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: partstatsdb1@testtable1 +POSTHOOK: Output: partstatsdb1@testtable1 +PREHOOK: query: DROP TABLE partstatsdb1.TESTTABLE2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: partstatsdb1@testtable2 +PREHOOK: Output: partstatsdb1@testtable2 +POSTHOOK: query: DROP TABLE partstatsdb1.TESTTABLE2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: partstatsdb1@testtable2 +POSTHOOK: Output: partstatsdb1@testtable2 +PREHOOK: query: DROP DATABASE partstatsdb1 +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:partstatsdb1 +PREHOOK: Output: database:partstatsdb1 +POSTHOOK: query: DROP DATABASE partstatsdb1 +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:partstatsdb1 +POSTHOOK: Output: database:partstatsdb1 +PREHOOK: query: CREATE DATABASE IF NOT EXISTS PARTSTATSDB2 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:PARTSTATSDB2 +POSTHOOK: query: CREATE DATABASE IF NOT EXISTS PARTSTATSDB2 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:PARTSTATSDB2 +PREHOOK: query: USE PARTSTATSDB2 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:partstatsdb2 +POSTHOOK: query: USE PARTSTATSDB2 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:partstatsdb2 +PREHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: PARTSTATSDB2@testtable +PREHOOK: Output: database:partstatsdb2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: PARTSTATSDB2@testtable +POSTHOOK: Output: database:partstatsdb2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p11', Part2='P12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable +POSTHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p21', Part2='P22') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable PARTITION (part1='p21', Part2='P22') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable +POSTHOOK: Output: partstatsdb2@testtable@part1=p21/part2=P22 +PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable +PREHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 +PREHOOK: Input: partstatsdb2@testtable@part1=p21/part2=P22 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable +POSTHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 +POSTHOOK: Input: partstatsdb2@testtable@part1=p21/part2=P22 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable +PREHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE testtable PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable +POSTHOOK: Input: partstatsdb2@testtable@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: PARTSTATSDB2@TestTable1 +PREHOOK: Output: database:partstatsdb2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: PARTSTATSDB2@TestTable1 +POSTHOOK: Output: database:partstatsdb2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P11') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P11') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P11 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p21', Part2='P22') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p21', Part2='P22') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p21/part2=P22 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p31', Part2='P32') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 PARTITION (part1='p31', Part2='P32') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p31/part2=P32 +PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable1 +PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P11 +PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 +PREHOOK: Input: partstatsdb2@testtable1@part1=p21/part2=P22 +PREHOOK: Input: partstatsdb2@testtable1@part1=p31/part2=P32 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable1 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P11 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p21/part2=P22 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p31/part2=P32 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable1 +PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P11 +PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable1 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P11 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable1 +PREHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable1 +POSTHOOK: Input: partstatsdb2@testtable1@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: PARTSTATSDB2@TESTTABLE2 +PREHOOK: Output: database:partstatsdb2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PARTITIONED BY (part1 STRING, Part2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: PARTSTATSDB2@TESTTABLE2 +POSTHOOK: Output: database:partstatsdb2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p21', Part2='P22') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 PARTITION (part1='p21', Part2='P22') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p21/part2=P22 +PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable2 +PREHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 +PREHOOK: Input: partstatsdb2@testtable2@part1=p21/part2=P22 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable2 +POSTHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 +POSTHOOK: Input: partstatsdb2@testtable2@part1=p21/part2=P22 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: partstatsdb2@testtable2 +PREHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TESTTABLE2 PARTITION (part1='p11', Part2='P12') COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: partstatsdb2@testtable2 +POSTHOOK: Input: partstatsdb2@testtable2@part1=p11/part2=P12 +#### A masked pattern was here #### +PREHOOK: query: ALTER TABLE PARTSTATSDB2.testtable DROP PARTITION (part1='p11', Part2='P12') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: partstatsdb2@testtable +PREHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +POSTHOOK: query: ALTER TABLE PARTSTATSDB2.testtable DROP PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: partstatsdb2@testtable +POSTHOOK: Output: partstatsdb2@testtable@part1=p11/part2=P12 +PREHOOK: query: ALTER TABLE PARTSTATSDB2.TestTable1 DROP PARTITION (part1='p11', Part2='P12') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: partstatsdb2@testtable1 +PREHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +POSTHOOK: query: ALTER TABLE PARTSTATSDB2.TestTable1 DROP PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1@part1=p11/part2=P12 +PREHOOK: query: ALTER TABLE PARTSTATSDB2.TESTTABLE2 DROP PARTITION (part1='p11', Part2='P12') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: partstatsdb2@testtable2 +PREHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +POSTHOOK: query: ALTER TABLE PARTSTATSDB2.TESTTABLE2 DROP PARTITION (part1='p11', Part2='P12') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: partstatsdb2@testtable2 +POSTHOOK: Output: partstatsdb2@testtable2@part1=p11/part2=P12 +PREHOOK: query: DROP TABLE PARTSTATSDB2.testtable +PREHOOK: type: DROPTABLE +PREHOOK: Input: partstatsdb2@testtable +PREHOOK: Output: partstatsdb2@testtable +POSTHOOK: query: DROP TABLE PARTSTATSDB2.testtable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: partstatsdb2@testtable +POSTHOOK: Output: partstatsdb2@testtable +PREHOOK: query: DROP TABLE PARTSTATSDB2.TestTable1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: partstatsdb2@testtable1 +PREHOOK: Output: partstatsdb2@testtable1 +POSTHOOK: query: DROP TABLE PARTSTATSDB2.TestTable1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: partstatsdb2@testtable1 +POSTHOOK: Output: partstatsdb2@testtable1 +PREHOOK: query: DROP TABLE PARTSTATSDB2.TESTTABLE2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: partstatsdb2@testtable2 +PREHOOK: Output: partstatsdb2@testtable2 +POSTHOOK: query: DROP TABLE PARTSTATSDB2.TESTTABLE2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: partstatsdb2@testtable2 +POSTHOOK: Output: partstatsdb2@testtable2 +PREHOOK: query: DROP DATABASE PARTSTATSDB2 +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:partstatsdb2 +PREHOOK: Output: database:partstatsdb2 +POSTHOOK: query: DROP DATABASE PARTSTATSDB2 +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:partstatsdb2 +POSTHOOK: Output: database:partstatsdb2 diff --git a/ql/src/test/results/clientpositive/drop_table_with_stats.q.out b/ql/src/test/results/clientpositive/drop_table_with_stats.q.out new file mode 100644 index 0000000..fbc3ab9 --- /dev/null +++ b/ql/src/test/results/clientpositive/drop_table_with_stats.q.out @@ -0,0 +1,236 @@ +PREHOOK: query: -- This test verifies that a table could be dropped with columns stats computed +-- The column stats for table without partition will go to TAB_COL_STATS +CREATE DATABASE IF NOT EXISTS tblstatsdb1 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:tblstatsdb1 +POSTHOOK: query: -- This test verifies that a table could be dropped with columns stats computed +-- The column stats for table without partition will go to TAB_COL_STATS +CREATE DATABASE IF NOT EXISTS tblstatsdb1 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:tblstatsdb1 +PREHOOK: query: USE tblstatsdb1 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:tblstatsdb1 +POSTHOOK: query: USE tblstatsdb1 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:tblstatsdb1 +PREHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:tblstatsdb1 +PREHOOK: Output: tblstatsdb1@testtable +POSTHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:tblstatsdb1 +POSTHOOK: Output: tblstatsdb1@testtable +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable +PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: tblstatsdb1@testtable +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: tblstatsdb1@testtable +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:tblstatsdb1 +PREHOOK: Output: tblstatsdb1@TestTable1 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:tblstatsdb1 +POSTHOOK: Output: tblstatsdb1@TestTable1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable1 +PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: tblstatsdb1@testtable1 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: tblstatsdb1@testtable1 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:tblstatsdb1 +PREHOOK: Output: tblstatsdb1@TESTTABLE2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:tblstatsdb1 +POSTHOOK: Output: tblstatsdb1@TESTTABLE2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable2 +PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: tblstatsdb1@testtable2 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: tblstatsdb1@testtable2 +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE tblstatsdb1.testtable +PREHOOK: type: DROPTABLE +PREHOOK: Input: tblstatsdb1@testtable +PREHOOK: Output: tblstatsdb1@testtable +POSTHOOK: query: DROP TABLE tblstatsdb1.testtable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: tblstatsdb1@testtable +POSTHOOK: Output: tblstatsdb1@testtable +PREHOOK: query: DROP TABLE tblstatsdb1.TestTable1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: tblstatsdb1@testtable1 +PREHOOK: Output: tblstatsdb1@testtable1 +POSTHOOK: query: DROP TABLE tblstatsdb1.TestTable1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: tblstatsdb1@testtable1 +POSTHOOK: Output: tblstatsdb1@testtable1 +PREHOOK: query: DROP TABLE tblstatsdb1.TESTTABLE2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: tblstatsdb1@testtable2 +PREHOOK: Output: tblstatsdb1@testtable2 +POSTHOOK: query: DROP TABLE tblstatsdb1.TESTTABLE2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: tblstatsdb1@testtable2 +POSTHOOK: Output: tblstatsdb1@testtable2 +PREHOOK: query: DROP DATABASE tblstatsdb1 +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:tblstatsdb1 +PREHOOK: Output: database:tblstatsdb1 +POSTHOOK: query: DROP DATABASE tblstatsdb1 +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:tblstatsdb1 +POSTHOOK: Output: database:tblstatsdb1 +PREHOOK: query: CREATE DATABASE IF NOT EXISTS TBLSTATSDB2 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:TBLSTATSDB2 +POSTHOOK: query: CREATE DATABASE IF NOT EXISTS TBLSTATSDB2 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:TBLSTATSDB2 +PREHOOK: query: USE TBLSTATSDB2 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:tblstatsdb2 +POSTHOOK: query: USE TBLSTATSDB2 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:tblstatsdb2 +PREHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: TBLSTATSDB2@testtable +PREHOOK: Output: database:tblstatsdb2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS testtable (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: TBLSTATSDB2@testtable +POSTHOOK: Output: database:tblstatsdb2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE testtable +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable +PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: tblstatsdb2@testtable +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: tblstatsdb2@testtable +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: TBLSTATSDB2@TestTable1 +PREHOOK: Output: database:tblstatsdb2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: TBLSTATSDB2@TestTable1 +POSTHOOK: Output: database:tblstatsdb2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TestTable1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable1 +PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: tblstatsdb2@testtable1 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: tblstatsdb2@testtable1 +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: TBLSTATSDB2@TESTTABLE2 +PREHOOK: Output: database:tblstatsdb2 +POSTHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: TBLSTATSDB2@TESTTABLE2 +POSTHOOK: Output: database:tblstatsdb2 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable2 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' OVERWRITE INTO TABLE TESTTABLE2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable2 +PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +PREHOOK: type: QUERY +PREHOOK: Input: tblstatsdb2@testtable2 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key +POSTHOOK: type: QUERY +POSTHOOK: Input: tblstatsdb2@testtable2 +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE TBLSTATSDB2.testtable +PREHOOK: type: DROPTABLE +PREHOOK: Input: tblstatsdb2@testtable +PREHOOK: Output: tblstatsdb2@testtable +POSTHOOK: query: DROP TABLE TBLSTATSDB2.testtable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: tblstatsdb2@testtable +POSTHOOK: Output: tblstatsdb2@testtable +PREHOOK: query: DROP TABLE TBLSTATSDB2.TestTable1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: tblstatsdb2@testtable1 +PREHOOK: Output: tblstatsdb2@testtable1 +POSTHOOK: query: DROP TABLE TBLSTATSDB2.TestTable1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: tblstatsdb2@testtable1 +POSTHOOK: Output: tblstatsdb2@testtable1 +PREHOOK: query: DROP TABLE TBLSTATSDB2.TESTTABLE2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: tblstatsdb2@testtable2 +PREHOOK: Output: tblstatsdb2@testtable2 +POSTHOOK: query: DROP TABLE TBLSTATSDB2.TESTTABLE2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: tblstatsdb2@testtable2 +POSTHOOK: Output: tblstatsdb2@testtable2 +PREHOOK: query: DROP DATABASE TBLSTATSDB2 +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:tblstatsdb2 +PREHOOK: Output: database:tblstatsdb2 +POSTHOOK: query: DROP DATABASE TBLSTATSDB2 +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:tblstatsdb2 +POSTHOOK: Output: database:tblstatsdb2 diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out index b21cb11..87dddc1 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out @@ -1535,18 +1535,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2_orc + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc Stage: Stage-0 Move Operator @@ -1606,17 +1602,13 @@ STAGE PLANS: keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1626,14 +1618,14 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Reduce Operator Tree: Extract - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out index f4b1013..cdb783e 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out @@ -1436,18 +1436,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 Stage: Stage-0 Move Operator @@ -1506,17 +1502,13 @@ STAGE PLANS: keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1526,14 +1518,14 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Reduce Operator Tree: Extract - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out index 5ee36f7..8c58880 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out @@ -87,20 +87,16 @@ STAGE PLANS: Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) @@ -554,20 +550,16 @@ STAGE PLANS: Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) @@ -1075,20 +1067,16 @@ STAGE PLANS: Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index 8ebbdc3..619200d 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -1057,9 +1057,8 @@ POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds= POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1075,6 +1074,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct) sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) Reduce Operator Tree: Select Operator @@ -1083,24 +1083,6 @@ STAGE PLANS: File Output Operator compressed: false table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) - sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - value expressions: _col0 (type: struct), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string) - Reduce Operator Tree: - Extract - File Output Operator - compressed: false - table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1119,7 +1101,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr=11 @@ -1149,9 +1131,8 @@ POSTHOOK: query: explain update acid set key = 'foo' where value = 'bar' and ds= POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -1167,6 +1148,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: struct) sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) Reduce Operator Tree: Select Operator @@ -1175,24 +1157,6 @@ STAGE PLANS: File Output Operator compressed: false table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) - sort order: ++++ - Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - value expressions: _col0 (type: struct), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string) - Reduce Operator Tree: - Extract - File Output Operator - compressed: false - table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde @@ -1211,7 +1175,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator PREHOOK: query: update acid set key = 'foo' where value = 'bar' and ds='2008-04-08' and hr>=11 diff --git a/ql/src/test/results/clientpositive/explain_dependency.q.out b/ql/src/test/results/clientpositive/explain_dependency.q.out index b9a8ed8..617ca50 100644 --- a/ql/src/test/results/clientpositive/explain_dependency.q.out +++ b/ql/src/test/results/clientpositive/explain_dependency.q.out @@ -95,7 +95,7 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT S1.key, S2.value FROM src S1 JOIN srcpart S2 ON S1.key = S2.key WHERE ds IS NOT NULL POSTHOOK: type: QUERY -{"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"},{"tablename":"default@src","tabletype":"MANAGED_TABLE"}]} +{"input_partitions":[{"partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@src","tabletype":"MANAGED_TABLE"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE"}]} PREHOOK: query: -- With views EXPLAIN DEPENDENCY SELECT * FROM V1 PREHOOK: type: QUERY @@ -112,12 +112,12 @@ PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V3 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V3 POSTHOOK: type: QUERY -{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v3","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v3]"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v3]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} +{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v3","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v3]"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v3]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} PREHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V4 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN DEPENDENCY SELECT * FROM V4 POSTHOOK: type: QUERY -{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v4","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v4, default@v1]"},{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} +{"input_partitions":[{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-08/hr=12"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=11"},{"partitionParents":"[default@v2]","partitionName":"default@srcpart@ds=2008-04-09/hr=12"}],"input_tables":[{"tablename":"default@v4","tabletype":"VIRTUAL_VIEW"},{"tablename":"default@v1","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@v2","tabletype":"VIRTUAL_VIEW","tableParents":"[default@v4]"},{"tablename":"default@src","tabletype":"MANAGED_TABLE","tableParents":"[default@v4, default@v1]"},{"tablename":"default@srcpart","tabletype":"MANAGED_TABLE","tableParents":"[default@v2]"}]} PREHOOK: query: -- The table should show up in the explain dependency even if none -- of the partitions are selected. CREATE VIEW V5 as SELECT * FROM srcpart where ds = '10' diff --git a/ql/src/test/results/clientpositive/explain_logical.q.out b/ql/src/test/results/clientpositive/explain_logical.q.out index 5a9cc2a..ec2fd20 100644 --- a/ql/src/test/results/clientpositive/explain_logical.q.out +++ b/ql/src/test/results/clientpositive/explain_logical.q.out @@ -370,10 +370,10 @@ TOK_QUERY LOGICAL PLAN: s1 - TableScan (TS_1) + TableScan (TS_0) alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_11) + Filter Operator (FIL_10) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_3) @@ -401,10 +401,10 @@ s1 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe s2 - TableScan (TS_0) + TableScan (TS_1) alias: s2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_10) + Filter Operator (FIL_11) predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_5) @@ -511,7 +511,7 @@ v3:src1:srcpart TableScan (TS_0) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_14) + Filter Operator (FIL_13) predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator (SEL_2) @@ -546,7 +546,7 @@ v3:src2 TableScan (TS_3) alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_13) + Filter Operator (FIL_14) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_7) @@ -586,13 +586,13 @@ TOK_QUERY LOGICAL PLAN: v4:src1:src - TableScan (TS_3) + TableScan (TS_0) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator (FIL_16) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_4) + Select Operator (SEL_1) expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -623,13 +623,13 @@ v4:src1:src output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe v4:src2:srcpart - TableScan (TS_0) + TableScan (TS_2) alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_18) + Filter Operator (FIL_17) predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_2) + Select Operator (SEL_4) expressions: key (type: string), value (type: string) outputColumnNames: _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -653,7 +653,7 @@ v4:src3 TableScan (TS_5) alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_17) + Filter Operator (FIL_18) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_11) @@ -800,74 +800,66 @@ s1:src TableScan (TS_0) alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_17) + Filter Operator (FIL_16) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_1) - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator (GBY_2) + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator (GBY_2) - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator (RS_3) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_3) - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Group By Operator (GBY_4) + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator (GBY_4) - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_5) - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_8) - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Join Operator (JOIN_11) - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 + Reduce Output Operator (RS_8) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Join Operator (JOIN_11) + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator (SEL_12) + expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator (RS_13) + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator (SEL_12) - expressions: _col0 (type: string), _col1 (type: bigint), _col3 (type: string) + value expressions: _col1 (type: bigint), _col2 (type: string) + Select Operator (SEL_14) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator (RS_13) - key expressions: _col0 (type: string) - sort order: + + File Output Operator (FS_15) + compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: string) - Select Operator (SEL_14) - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator (FS_15) - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe s2 TableScan (TS_6) alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator (FIL_16) + Filter Operator (FIL_17) predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator (RS_10) diff --git a/ql/src/test/results/clientpositive/explain_rearrange.q.out b/ql/src/test/results/clientpositive/explain_rearrange.q.out index 7bbad10..b251e9c 100644 --- a/ql/src/test/results/clientpositive/explain_rearrange.q.out +++ b/ql/src/test/results/clientpositive/explain_rearrange.q.out @@ -60,15 +60,15 @@ order by src1.key, src1.cnt1, src2.cnt1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] - Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-10, Stage-11, Stage-3 [CONDITIONAL] - Stage-10 has a backup stage: Stage-3 [MAPREDLOCAL] + Stage-9 depends on stages: Stage-1, Stage-5 , consists of Stage-10, Stage-11, Stage-2 [CONDITIONAL] + Stage-10 has a backup stage: Stage-2 [MAPREDLOCAL] Stage-7 depends on stages: Stage-10 [MAPRED] - Stage-4 depends on stages: Stage-3, Stage-7, Stage-8 [MAPRED] - Stage-11 has a backup stage: Stage-3 [MAPREDLOCAL] + Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 [MAPRED] + Stage-11 has a backup stage: Stage-2 [MAPREDLOCAL] Stage-8 depends on stages: Stage-11 [MAPRED] - Stage-3 [MAPRED] + Stage-2 [MAPRED] Stage-5 is a root stage [MAPRED] - Stage-0 depends on stages: Stage-4 [FETCH] + Stage-0 depends on stages: Stage-3 [FETCH] STAGE PLANS: Stage: Stage-1 @@ -90,34 +90,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -165,7 +159,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -226,7 +220,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -278,34 +272,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -385,34 +373,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Conditional Operator @@ -573,34 +555,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-10 Fetch Operator @@ -680,34 +656,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -728,34 +698,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Conditional Operator @@ -975,34 +939,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1023,34 +981,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Conditional Operator diff --git a/ql/src/test/results/clientpositive/gby_star.q.out b/ql/src/test/results/clientpositive/gby_star.q.out index 3021171..eeafa3d 100644 --- a/ql/src/test/results/clientpositive/gby_star.q.out +++ b/ql/src/test/results/clientpositive/gby_star.q.out @@ -97,22 +97,18 @@ STAGE PLANS: Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: sum(key) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(key) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: double) + value expressions: _col2 (type: double) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -293,22 +289,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out index 1759fc7..68c262f 100644 --- a/ql/src/test/results/clientpositive/groupby1_limit.q.out +++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -52,19 +52,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/groupby2_limit.q.out b/ql/src/test/results/clientpositive/groupby2_limit.q.out index 855bd63..8f185a3 100644 --- a/ql/src/test/results/clientpositive/groupby2_limit.q.out +++ b/ql/src/test/results/clientpositive/groupby2_limit.q.out @@ -38,17 +38,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -57,13 +53,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out index 8b46eab..9b9cee1 100644 --- a/ql/src/test/results/clientpositive/groupby4.q.out +++ b/ql/src/test/results/clientpositive/groupby4.q.out @@ -68,18 +68,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out index d8a7af7..625f6a1 100644 --- a/ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -45,18 +45,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out index 21e5a3e..a042e43 100644 --- a/ql/src/test/results/clientpositive/groupby6.q.out +++ b/ql/src/test/results/clientpositive/groupby6.q.out @@ -64,18 +64,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out index dd499f1..bcd127d 100644 --- a/ql/src/test/results/clientpositive/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map.q.out @@ -50,18 +50,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 078be70..cdeaffc 100644 --- a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -73,18 +73,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out index 686f3c9..e19b420 100644 --- a/ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -45,18 +45,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index fdcc138..bd852c8 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -63,38 +63,30 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/groupby_complex_types.q.out b/ql/src/test/results/clientpositive/groupby_complex_types.q.out index 8bf5672..4b561a7 100644 --- a/ql/src/test/results/clientpositive/groupby_complex_types.q.out +++ b/ql/src/test/results/clientpositive/groupby_complex_types.q.out @@ -111,18 +111,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: array), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator @@ -154,18 +150,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: map), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-1 Move Operator @@ -197,18 +189,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: struct), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 Stage: Stage-2 Move Operator diff --git a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out index 500fcd1..4611f60 100644 --- a/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out @@ -84,19 +84,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: array), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -153,19 +149,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: map), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce diff --git a/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out b/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out index 3bbd36f..d0617ff 100644 --- a/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out +++ b/ql/src/test/results/clientpositive/groupby_grouping_sets4.q.out @@ -54,22 +54,18 @@ STAGE PLANS: Filter Operator predicate: (a < 3) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col3 (type: bigint) + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -135,22 +131,18 @@ STAGE PLANS: Filter Operator predicate: (a < 3) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: a (type: string), b (type: string), '0' (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col3 (type: bigint) + value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -240,22 +232,18 @@ STAGE PLANS: Filter Operator predicate: (a < 3) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: a (type: string), b (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -345,22 +333,18 @@ STAGE PLANS: Filter Operator predicate: (a < 3) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: a (type: string), b (type: string) - outputColumnNames: a, b + Group By Operator + aggregations: count() + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: a (type: string), b (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index df7c232..e8d6038 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -41,16 +41,12 @@ STAGE PLANS: Filter Operator predicate: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5)) (type: boolean) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) - Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index 031b79e..fb15fef 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -55,16 +55,12 @@ STAGE PLANS: Filter Operator predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -225,16 +221,12 @@ STAGE PLANS: Filter Operator predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + value expressions: key (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -395,16 +387,12 @@ STAGE PLANS: Filter Operator predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -565,16 +553,12 @@ STAGE PLANS: Filter Operator predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + value expressions: key (type: string) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 70eb661..375f2f3 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -46,40 +46,32 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -244,21 +236,17 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -443,39 +431,31 @@ STAGE PLANS: Filter Operator predicate: (key <= 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -484,15 +464,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: string) sort order: -+ - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -579,21 +559,17 @@ STAGE PLANS: Filter Operator predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -627,7 +603,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -637,11 +613,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -656,15 +632,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: --++ - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -679,36 +655,28 @@ STAGE PLANS: Filter Operator predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/groupby_ppd.q.out b/ql/src/test/results/clientpositive/groupby_ppd.q.out index 8b8a96e..886a0c8 100644 --- a/ql/src/test/results/clientpositive/groupby_ppd.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppd.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (bar = 1) (type: boolean) @@ -47,7 +47,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (bar = 1) (type: boolean) diff --git a/ql/src/test/results/clientpositive/groupby_resolution.q.out b/ql/src/test/results/clientpositive/groupby_resolution.q.out index 1d1856f..53af591 100644 --- a/ql/src/test/results/clientpositive/groupby_resolution.q.out +++ b/ql/src/test/results/clientpositive/groupby_resolution.q.out @@ -639,22 +639,18 @@ STAGE PLANS: Filter Operator predicate: (key < '12') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index a74cb2b..b199bf9 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -2350,24 +2350,20 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(_col1) + keys: (_col0 + _col0) (type: double) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2628,47 +2624,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2683,47 +2675,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3210,27 +3198,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types double,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3774,28 +3758,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -3804,28 +3780,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3888,17 +3856,17 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4094,24 +4062,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4172,28 +4136,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-1 Map Reduce @@ -4206,35 +4166,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string), _col2 (type: bigint) auto parallelism: false @@ -4321,17 +4273,17 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index df9d51e..3e68cac 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -2546,24 +2546,20 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(_col1) + keys: (_col0 + _col0) (type: double) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2888,47 +2884,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2943,47 +2935,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3535,27 +3523,23 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types double,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -4099,28 +4083,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -4129,28 +4105,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4213,17 +4181,17 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4420,24 +4388,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4562,28 +4526,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-1 Map Reduce @@ -4596,35 +4556,27 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string), _col2 (type: bigint) auto parallelism: false @@ -4711,17 +4663,17 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/having.q.out b/ql/src/test/results/clientpositive/having.q.out index 4cda72f..4dd52ff 100644 --- a/ql/src/test/results/clientpositive/having.q.out +++ b/ql/src/test/results/clientpositive/having.q.out @@ -95,22 +95,18 @@ STAGE PLANS: Filter Operator predicate: (key <> 302) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -736,22 +732,18 @@ STAGE PLANS: Filter Operator predicate: (key > 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out b/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out index 47fed0c..edf356a 100644 --- a/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out +++ b/ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -19,17 +19,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -41,6 +30,17 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -203,10 +203,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: default.default__srcpart_srcpart_index__ - filterExpr: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + alias: default.default__src_src_index__ + filterExpr: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator - predicate: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + predicate: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 @@ -246,18 +246,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - filterExpr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a filterExpr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -270,6 +258,18 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: b + filterExpr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -295,10 +295,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ - filterExpr: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + alias: default.default__srcpart_srcpart_index__ + filterExpr: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator - predicate: (((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + predicate: (((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out b/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out index 1e344fa..8f03299 100644 --- a/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out +++ b/ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out @@ -19,17 +19,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -41,6 +30,17 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -213,10 +213,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: default.default__srcpart_srcpart_index__ - filterExpr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + alias: default.default__src_src_index__ + filterExpr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) Filter Operator - predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 @@ -246,18 +246,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - filterExpr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a filterExpr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -270,6 +258,18 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: b + filterExpr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -323,10 +323,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: default.default__src_src_index__ - filterExpr: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) + alias: default.default__srcpart_srcpart_index__ + filterExpr: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) Filter Operator - predicate: ((((key > 80) and (key < 100)) and (key > 70)) and (key < 90)) (type: boolean) + predicate: ((((key > 70) and (key < 90)) and (key > 80)) and (key < 100)) (type: boolean) Select Operator expressions: _bucketname (type: string), _offsets (type: array) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/index_auto_self_join.q.out b/ql/src/test/results/clientpositive/index_auto_self_join.q.out index 2bc80f4..1b6846a 100644 --- a/ql/src/test/results/clientpositive/index_auto_self_join.q.out +++ b/ql/src/test/results/clientpositive/index_auto_self_join.q.out @@ -17,10 +17,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) @@ -29,10 +29,10 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) @@ -124,9 +124,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.default__src_src_index__ - filterExpr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + filterExpr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator - predicate: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + predicate: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 @@ -166,11 +166,11 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + alias: a + filterExpr: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) @@ -179,11 +179,11 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) TableScan - alias: a - filterExpr: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + alias: b + filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) @@ -217,9 +217,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.default__src_src_index__ - filterExpr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + filterExpr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Filter Operator - predicate: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) + predicate: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) (type: boolean) Select Operator expressions: _bucketname (type: string), _offset (type: bigint) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/index_auto_update.q.out b/ql/src/test/results/clientpositive/index_auto_update.q.out index 0f996b5..3c0320d 100644 --- a/ql/src/test/results/clientpositive/index_auto_update.q.out +++ b/ql/src/test/results/clientpositive/index_auto_update.q.out @@ -126,16 +126,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: array) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.default__temp_temp_index__ + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.default__temp_temp_index__ Stage: Stage-1 Move Operator diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index e513626..303929a 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -354,7 +354,7 @@ SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b. POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index 35c4167..c4c7c7a 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -89,18 +89,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table_out + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out Stage: Stage-7 Conditional Operator @@ -252,16 +248,12 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(_col1) is not null (type: boolean) Statistics: Num rows: 15 Data size: 1503 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col1) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 15 Data size: 1503 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col1) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 15 Data size: 1503 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -286,18 +278,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table_out + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out Stage: Stage-0 Move Operator @@ -389,13 +377,13 @@ SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -428,10 +416,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -449,10 +437,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -464,7 +452,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -476,7 +464,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -550,7 +538,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -572,35 +560,29 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col6 - Select Operator - expressions: _col6 (type: string) - outputColumnNames: _col6 - Group By Operator - aggregations: count() - keys: _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table_out + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out Stage: Stage-0 Move Operator @@ -614,7 +596,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out index 2a0336e..acc4bcf 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_reducers_power_two.q.out @@ -246,7 +246,7 @@ SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b. POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 -POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)c.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/innerjoin.q.out b/ql/src/test/results/clientpositive/innerjoin.q.out index f058d37..34d6fde 100644 --- a/ql/src/test/results/clientpositive/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/innerjoin.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -38,9 +38,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -50,6 +49,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1184,15 +1184,6 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1203,6 +1194,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/input12.q.out b/ql/src/test/results/clientpositive/input12.q.out index 20985eb..1557c58 100644 --- a/ql/src/test/results/clientpositive/input12.q.out +++ b/ql/src/test/results/clientpositive/input12.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/input30.q.out b/ql/src/test/results/clientpositive/input30.q.out index ae89ccb..cda0df6 100644 --- a/ql/src/test/results/clientpositive/input30.q.out +++ b/ql/src/test/results/clientpositive/input30.q.out @@ -37,17 +37,15 @@ STAGE PLANS: Filter Operator predicate: (((hash(rand(460476415)) & 2147483647) % 32) = 0) (type: boolean) Statistics: Num rows: 250 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 250 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/input39.q.out b/ql/src/test/results/clientpositive/input39.q.out index 6868da7..0afa77b 100644 --- a/ql/src/test/results/clientpositive/input39.q.out +++ b/ql/src/test/results/clientpositive/input39.q.out @@ -1,11 +1,11 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@t1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table t1(key string, value string) partitioned by (ds string) @@ -72,7 +72,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((hash(rand(460476415)) & 2147483647) % 32) = 0) and key is not null) (type: boolean) @@ -83,7 +83,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t1 + alias: t2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((hash(rand(460476415)) & 2147483647) % 32) = 0) and key is not null) (type: boolean) @@ -101,19 +101,17 @@ STAGE PLANS: 0 1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/input9.q.out b/ql/src/test/results/clientpositive/input9.q.out index ec1bcab..4666787 100644 --- a/ql/src/test/results/clientpositive/input9.q.out +++ b/ql/src/test/results/clientpositive/input9.q.out @@ -35,7 +35,7 @@ STAGE PLANS: predicate: (null = null) (type: boolean) Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: null (type: string), UDFToInteger(key) (type: int) + expressions: null (type: void), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join0.q.out b/ql/src/test/results/clientpositive/join0.q.out index 2461e40..a8301a5 100644 --- a/ql/src/test/results/clientpositive/join0.q.out +++ b/ql/src/test/results/clientpositive/join0.q.out @@ -61,16 +61,12 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -116,7 +112,7 @@ SELECT src1.key as k1, src1.value as v1, (SELECT * FROM src WHERE src.key < 10) src2 SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY -{"STAGE PLANS":{"Stage-2":{"Map Reduce":{"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}},"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)"}}}}]}},"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Join Operator":{"outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"Select Operator":{"expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe","input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","condition map:":[{"":"Inner Join 0 to 1"}],"condition expressions:":{"1":"{VALUE._col0} {VALUE._col1}","0":"{VALUE._col0} {VALUE._col1}"}}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}},{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}},"STAGE DEPENDENCIES":{"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}}} +{"STAGE PLANS":{"Stage-2":{"Map Reduce":{"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}},"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)"}}}}]}},"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Join Operator":{"outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe","input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","condition map:":[{"":"Inner Join 0 to 1"}],"condition expressions:":{"1":"{VALUE._col0} {VALUE._col1}","0":"{VALUE._col0} {VALUE._col1}"}}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}},{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}},"STAGE DEPENDENCIES":{"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}}} Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM diff --git a/ql/src/test/results/clientpositive/join1.q.out b/ql/src/test/results/clientpositive/join1.q.out index 16b263c..a346a01 100644 --- a/ql/src/test/results/clientpositive/join1.q.out +++ b/ql/src/test/results/clientpositive/join1.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -38,9 +38,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -50,6 +49,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join11.q.out b/ql/src/test/results/clientpositive/join11.q.out index d08b795..d0f9726 100644 --- a/ql/src/test/results/clientpositive/join11.q.out +++ b/ql/src/test/results/clientpositive/join11.q.out @@ -33,15 +33,14 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -49,14 +48,15 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join12.q.out b/ql/src/test/results/clientpositive/join12.q.out index e103001..29bbd97 100644 --- a/ql/src/test/results/clientpositive/join12.q.out +++ b/ql/src/test/results/clientpositive/join12.q.out @@ -36,33 +36,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/join13.q.out b/ql/src/test/results/clientpositive/join13.q.out index 76349b5..819d40f 100644 --- a/ql/src/test/results/clientpositive/join13.q.out +++ b/ql/src/test/results/clientpositive/join13.q.out @@ -40,15 +40,14 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -56,14 +55,15 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join14.q.out b/ql/src/test/results/clientpositive/join14.q.out index 50803f3..c363979 100644 --- a/ql/src/test/results/clientpositive/join14.q.out +++ b/ql/src/test/results/clientpositive/join14.q.out @@ -1,12 +1,12 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- SORT_QUERY_RESULTS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- SORT_QUERY_RESULTS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -30,28 +30,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 100) and key is not null) (type: boolean) - Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + predicate: (key > 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > 100) and key is not null) (type: boolean) + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join15.q.out b/ql/src/test/results/clientpositive/join15.q.out index 00107f8..f5cd4c4 100644 --- a/ql/src/test/results/clientpositive/join15.q.out +++ b/ql/src/test/results/clientpositive/join15.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -26,7 +26,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/join17.q.out b/ql/src/test/results/clientpositive/join17.q.out index 114e535..a17df5f 100644 --- a/ql/src/test/results/clientpositive/join17.q.out +++ b/ql/src/test/results/clientpositive/join17.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -78,11 +78,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -94,7 +94,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -145,7 +145,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src2, src1] + /src [src1, src2] Needs Tagging: true Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/join18.q.out b/ql/src/test/results/clientpositive/join18.q.out index 5550ff6..fbde0c8 100644 --- a/ql/src/test/results/clientpositive/join18.q.out +++ b/ql/src/test/results/clientpositive/join18.q.out @@ -35,40 +35,37 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) + aggregations: count(value) + keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -78,14 +75,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -95,14 +92,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -112,41 +109,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/join18_multi_distinct.q.out index 5d4a738..ac2b0f1 100644 --- a/ql/src/test/results/clientpositive/join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/join18_multi_distinct.q.out @@ -41,40 +41,37 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) + aggregations: count(value) + keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -84,15 +81,15 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -101,14 +98,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -118,41 +115,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(value) - keys: key (type: string) + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join19.q.out b/ql/src/test/results/clientpositive/join19.q.out index 373ae6e..d0c9d31 100644 --- a/ql/src/test/results/clientpositive/join19.q.out +++ b/ql/src/test/results/clientpositive/join19.q.out @@ -123,16 +123,31 @@ t6.predicate='http://sofa.semanticweb.org/sofa/v1.0/system#__LABEL_REL' ON (t66.subject=t55.object) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: subject (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan alias: t2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -164,21 +179,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string) - TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: subject (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -197,7 +197,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -208,36 +208,36 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: string) TableScan - alias: t5 + alias: t4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://www.ontosearch.com/2007/12/ontosofa-ns#_to') and subject is not null) and object is not null) (type: boolean) + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Author')) and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: subject (type: string), object (type: string) - outputColumnNames: _col0, _col1 + expressions: subject (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string) TableScan - alias: t4 + alias: t5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Author')) and subject is not null) (type: boolean) + predicate: (((predicate = 'http://www.ontosearch.com/2007/12/ontosofa-ns#_to') and subject is not null) and object is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: subject (type: string) - outputColumnNames: _col0 + expressions: subject (type: string), object (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -256,10 +256,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col7 (type: string) + sort order: + + Map-reduce partition columns: _col7 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + TableScan alias: t6 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -275,13 +282,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col7 (type: string) - sort order: + - Map-reduce partition columns: _col7 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join2.q.out b/ql/src/test/results/clientpositive/join2.q.out index 1a60164..b56f4a1 100644 --- a/ql/src/test/results/clientpositive/join2.q.out +++ b/ql/src/test/results/clientpositive/join2.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -40,7 +40,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/join20.q.out b/ql/src/test/results/clientpositive/join20.q.out index 5ac2a99..78e8103 100644 --- a/ql/src/test/results/clientpositive/join20.q.out +++ b/ql/src/test/results/clientpositive/join20.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -28,16 +28,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -48,6 +39,15 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -677,6 +677,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 15) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -697,18 +709,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join21.q.out b/ql/src/test/results/clientpositive/join21.q.out index e35b107..c07f633 100644 --- a/ql/src/test/results/clientpositive/join21.q.out +++ b/ql/src/test/results/clientpositive/join21.q.out @@ -14,6 +14,15 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -34,15 +43,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join22.q.out b/ql/src/test/results/clientpositive/join22.q.out index 4c1a690..8cf051d 100644 --- a/ql/src/test/results/clientpositive/join22.q.out +++ b/ql/src/test/results/clientpositive/join22.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -24,8 +24,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -35,7 +36,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join23.q.out b/ql/src/test/results/clientpositive/join23.q.out index 0037d12..9f9787e 100644 --- a/ql/src/test/results/clientpositive/join23.q.out +++ b/ql/src/test/results/clientpositive/join23.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -25,7 +25,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) diff --git a/ql/src/test/results/clientpositive/join25.q.out b/ql/src/test/results/clientpositive/join25.q.out index 75ae979..6d4c603 100644 --- a/ql/src/test/results/clientpositive/join25.q.out +++ b/ql/src/test/results/clientpositive/join25.q.out @@ -23,13 +23,13 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -89,10 +89,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -108,10 +108,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -123,7 +123,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -135,7 +135,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index 90f4fbf..a49d945 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -104,13 +104,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-10 is a root stage Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-9 depends on stages: Stage-1 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-4 depends on stages: Stage-0 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-10 @@ -272,10 +272,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Stage: Stage-7 + Stage: Stage-9 Conditional Operator - Stage: Stage-4 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -303,11 +303,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -376,7 +376,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -445,7 +445,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-8 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join27.q.out b/ql/src/test/results/clientpositive/join27.q.out index afa618a..369f5da 100644 --- a/ql/src/test/results/clientpositive/join27.q.out +++ b/ql/src/test/results/clientpositive/join27.q.out @@ -23,13 +23,13 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -89,10 +89,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -108,10 +108,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -123,7 +123,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -135,7 +135,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index e1cad88..2967ce4 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -96,33 +96,29 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/join29.q.out b/ql/src/test/results/clientpositive/join29.q.out index f33eb33..f4aab18 100644 --- a/ql/src/test/results/clientpositive/join29.q.out +++ b/ql/src/test/results/clientpositive/join29.q.out @@ -43,44 +43,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -88,11 +80,11 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -145,11 +137,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -194,14 +186,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -211,14 +203,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -229,44 +221,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt diff --git a/ql/src/test/results/clientpositive/join3.q.out b/ql/src/test/results/clientpositive/join3.q.out index ef0969b..bff1eda 100644 --- a/ql/src/test/results/clientpositive/join3.q.out +++ b/ql/src/test/results/clientpositive/join3.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -39,7 +39,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: src3 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -49,9 +49,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -61,6 +60,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join30.q.out b/ql/src/test/results/clientpositive/join30.q.out index 90333a9..bb4f2a5 100644 --- a/ql/src/test/results/clientpositive/join30.q.out +++ b/ql/src/test/results/clientpositive/join30.q.out @@ -22,7 +22,7 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-4 @@ -67,22 +67,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -115,7 +111,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 diff --git a/ql/src/test/results/clientpositive/join31.q.out b/ql/src/test/results/clientpositive/join31.q.out index 00cae00..9d3b9c3 100644 --- a/ql/src/test/results/clientpositive/join31.q.out +++ b/ql/src/test/results/clientpositive/join31.q.out @@ -46,38 +46,34 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -91,11 +87,11 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -119,20 +115,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -182,11 +175,11 @@ STAGE PLANS: Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -210,20 +203,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -235,13 +225,13 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -251,59 +241,51 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index 9a7a2a3..10ad2cf 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -100,13 +100,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: x @@ -197,7 +197,7 @@ STAGE PLANS: 1 value (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -426,7 +426,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### @@ -450,8 +450,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join32_lessSize.q.out b/ql/src/test/results/clientpositive/join32_lessSize.q.out index 5cfbbad..0eb9c5d 100644 --- a/ql/src/test/results/clientpositive/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -108,15 +108,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-8 depends on stages: Stage-7 Stage-6 depends on stages: Stage-8 - Stage-7 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: x @@ -141,7 +141,7 @@ STAGE PLANS: 1 key (type: string) Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -277,7 +277,7 @@ STAGE PLANS: Truncated Path -> Alias: /src [y] - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: z @@ -348,7 +348,7 @@ STAGE PLANS: 1 value (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -491,7 +491,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### @@ -515,8 +515,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY @@ -1560,35 +1560,35 @@ STAGE PLANS: tag: 0 auto parallelism: false TableScan - alias: z - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 value expressions: value (type: string) auto parallelism: false TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 2 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -2029,27 +2029,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -2384,8 +2380,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY @@ -2637,27 +2633,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -3002,8 +2994,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY @@ -3173,16 +3165,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -3271,8 +3259,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)x.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY @@ -3430,16 +3418,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -3528,8 +3512,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)y.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index 9a7a2a3..10ad2cf 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -100,13 +100,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: x @@ -197,7 +197,7 @@ STAGE PLANS: 1 value (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -426,7 +426,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### @@ -450,8 +450,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index 70fcf8f..fd05212 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -166,24 +166,20 @@ STAGE PLANS: isSamplingPred: false predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -241,27 +237,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Map Reduce Local Work @@ -512,24 +504,20 @@ STAGE PLANS: isSamplingPred: false predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -587,27 +575,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.cnt diff --git a/ql/src/test/results/clientpositive/join36.q.out b/ql/src/test/results/clientpositive/join36.q.out index 03b1377..a56826b 100644 --- a/ql/src/test/results/clientpositive/join36.q.out +++ b/ql/src/test/results/clientpositive/join36.q.out @@ -63,13 +63,13 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -129,10 +129,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -148,10 +148,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -163,7 +163,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -175,7 +175,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join37.q.out b/ql/src/test/results/clientpositive/join37.q.out index afacf60..b393615 100644 --- a/ql/src/test/results/clientpositive/join37.q.out +++ b/ql/src/test/results/clientpositive/join37.q.out @@ -23,13 +23,13 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -89,10 +89,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -108,10 +108,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -123,7 +123,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -135,7 +135,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join38.q.out b/ql/src/test/results/clientpositive/join38.q.out index d3acf11..644c906 100644 --- a/ql/src/test/results/clientpositive/join38.q.out +++ b/ql/src/test/results/clientpositive/join38.q.out @@ -96,22 +96,18 @@ STAGE PLANS: 1 '111' (type: string) outputColumnNames: _col1, _col10 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col10 (type: string) - outputColumnNames: _col1, _col10 + Group By Operator + aggregations: count(1) + keys: _col1 (type: string), _col10 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: string), _col10 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/join39.q.out b/ql/src/test/results/clientpositive/join39.q.out index e1bc8b3..1ce7df2 100644 --- a/ql/src/test/results/clientpositive/join39.q.out +++ b/ql/src/test/results/clientpositive/join39.q.out @@ -23,13 +23,13 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -90,10 +90,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -109,10 +109,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -124,7 +124,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -136,7 +136,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join40.q.out b/ql/src/test/results/clientpositive/join40.q.out index 024dd3b..7a13fca 100644 --- a/ql/src/test/results/clientpositive/join40.q.out +++ b/ql/src/test/results/clientpositive/join40.q.out @@ -17,15 +17,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -41,6 +32,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -659,7 +659,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -669,9 +669,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -681,6 +680,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1764,7 +1764,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -1776,16 +1776,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -1796,6 +1787,15 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2425,6 +2425,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 15) and (key < 10)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2445,18 +2457,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -3737,7 +3737,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -3748,7 +3748,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -3766,19 +3766,17 @@ STAGE PLANS: 0 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/join41.q.out b/ql/src/test/results/clientpositive/join41.q.out index b464180..0690ec1 100644 --- a/ql/src/test/results/clientpositive/join41.q.out +++ b/ql/src/test/results/clientpositive/join41.q.out @@ -23,6 +23,15 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: src2 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -34,15 +43,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - TableScan - alias: src1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -98,6 +98,15 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: src2 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -109,15 +118,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - TableScan - alias: src1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join9.q.out b/ql/src/test/results/clientpositive/join9.q.out index adbeb61..0258ac0 100644 --- a/ql/src/test/results/clientpositive/join9.q.out +++ b/ql/src/test/results/clientpositive/join9.q.out @@ -82,7 +82,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -94,11 +94,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) + tag: 0 auto parallelism: false TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -110,7 +109,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 + value expressions: value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/join_alt_syntax.q.out index 097c5a9..cc1efe6 100644 --- a/ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -14,14 +14,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string) TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: @@ -71,7 +71,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -93,7 +93,7 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -167,7 +167,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -178,7 +178,7 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -220,7 +220,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name from part p1 , part p2 , part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name @@ -230,15 +230,22 @@ from part p1 , part p2 , part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string) + TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -248,13 +255,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string) Reduce Operator Tree: Join Operator condition map: @@ -274,10 +274,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -288,13 +295,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -336,17 +336,17 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -357,7 +357,7 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -387,6 +387,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -397,13 +404,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -420,10 +420,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) + TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -435,13 +442,6 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -483,17 +483,17 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -504,7 +504,7 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -534,6 +534,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -544,13 +551,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Reduce Operator Tree: Join Operator condition map: @@ -567,10 +567,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) + TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -582,13 +589,6 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out index 5e652fb..a0b5958 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -37,7 +37,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -92,7 +92,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -116,7 +116,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -156,7 +156,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name PREHOOK: type: QUERY @@ -164,15 +164,22 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -182,13 +189,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -208,10 +208,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -223,13 +230,6 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -257,7 +257,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name PREHOOK: type: QUERY @@ -265,15 +265,22 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -283,13 +290,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -306,10 +306,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -321,13 +328,6 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out index 973cfec..34e97d4 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_2.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p4 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -25,7 +25,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -37,7 +37,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -49,7 +49,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -100,17 +100,17 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -122,7 +122,7 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -153,6 +153,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -164,13 +171,6 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -187,10 +187,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -202,13 +209,6 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out index 5513e42..1ae7edf 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -39,7 +39,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -99,7 +99,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -123,7 +123,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -166,7 +166,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name @@ -176,15 +176,22 @@ from part p1 join part p2 join part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -194,13 +201,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -220,10 +220,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -235,13 +242,6 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -272,7 +272,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 where p2.p_partkey = 1 and p3.p_name = p2.p_name @@ -282,15 +282,22 @@ from part p1 join part p2 join part p3 where p2.p_partkey = 1 and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -300,13 +307,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -323,10 +323,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -338,13 +345,6 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out index d14a940..2b2d654 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_4.q.out @@ -15,7 +15,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p4 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -27,7 +27,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -39,7 +39,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -51,7 +51,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -107,17 +107,17 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -129,7 +129,7 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) @@ -160,6 +160,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -171,13 +178,6 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -194,10 +194,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -209,13 +216,6 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out index 0b376c0..629c2d4 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out @@ -69,17 +69,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: p3_name (type: string) + key expressions: p_name (type: string) sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -93,17 +93,17 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p_name (type: string) + key expressions: p3_name (type: string) sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -148,17 +148,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: p3_name (type: string) + key expressions: p_name (type: string) sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -172,17 +172,17 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p_name (type: string) + key expressions: p3_name (type: string) sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -212,7 +212,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name PREHOOK: type: QUERY @@ -220,15 +220,22 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -238,13 +245,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -264,10 +264,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -279,13 +286,6 @@ STAGE PLANS: Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -313,7 +313,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name PREHOOK: type: QUERY @@ -321,15 +321,22 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -339,13 +346,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -362,10 +362,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -377,13 +384,6 @@ STAGE PLANS: Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out index d952a97..922e6d5 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out @@ -69,7 +69,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p4 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -81,7 +81,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -93,29 +93,29 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p3_name (type: string) + key expressions: p2_name (type: string) sort order: + - Map-reduce partition columns: p3_name (type: string) + Map-reduce partition columns: p2_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p2_name (type: string) + key expressions: p3_name (type: string) sort order: + - Map-reduce partition columns: p2_name (type: string) + Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -156,28 +156,16 @@ from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on and p1.p_partkey = p2_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string), p2_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -189,6 +177,18 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string), p2_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -209,6 +209,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -220,13 +227,6 @@ STAGE PLANS: Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -243,10 +243,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -258,13 +265,6 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out index d9121a6..72a6a24 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out @@ -71,17 +71,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: p3_name (type: string) + key expressions: p_name (type: string) sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -95,17 +95,17 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p_name (type: string) + key expressions: p3_name (type: string) sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -155,17 +155,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: p3_name (type: string) + key expressions: p_name (type: string) sort order: + - Map-reduce partition columns: p3_name (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -179,17 +179,17 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p_name (type: string) + key expressions: p3_name (type: string) sort order: + - Map-reduce partition columns: p_name (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -222,7 +222,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name @@ -232,15 +232,22 @@ from part p1 join part2 p2 join part3 p3 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -250,13 +257,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -276,10 +276,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -291,13 +298,6 @@ STAGE PLANS: Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -328,7 +328,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 where p2_partkey = 1 and p3_name = p2_name @@ -338,15 +338,22 @@ from part p1 join part2 p2 join part3 p3 where p2_partkey = 1 and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -356,13 +363,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -379,10 +379,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -394,13 +401,6 @@ STAGE PLANS: Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out index ce70b35..f751ce0 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p4 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -83,7 +83,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p1 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -95,29 +95,29 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p3_name (type: string) + key expressions: p2_name (type: string) sort order: + - Map-reduce partition columns: p3_name (type: string) + Map-reduce partition columns: p2_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: p2_name (type: string) + key expressions: p3_name (type: string) sort order: + - Map-reduce partition columns: p2_name (type: string) + Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -163,28 +163,16 @@ where p2_name = p3_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: p2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: p2_name (type: string), p2_partkey (type: int) - sort order: ++ - Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -196,6 +184,18 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + TableScan + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: p2_name (type: string), p2_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) Reduce Operator Tree: Join Operator condition map: @@ -216,6 +216,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + TableScan alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -227,13 +234,6 @@ STAGE PLANS: Map-reduce partition columns: p3_name (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -250,10 +250,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + TableScan alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -265,13 +272,6 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/join_filters_overlap.q.out index 51b4089..14a63cd 100644 --- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -101,6 +101,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false + TableScan alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -132,18 +144,6 @@ STAGE PLANS: tag: 2 value expressions: value (type: int) auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -192,7 +192,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b, c, a] + /a [a, b, c] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -352,6 +352,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 50) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false + TableScan alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -379,22 +395,6 @@ STAGE PLANS: tag: 2 value expressions: value (type: int) auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 50) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -443,7 +443,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b, c, a] + /a [a, b, c] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -617,6 +617,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value = 50) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false + TableScan alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -644,22 +660,6 @@ STAGE PLANS: tag: 2 value expressions: value (type: int) auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 50) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -708,7 +708,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b, c, a] + /a [a, b, c] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -896,21 +896,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 40) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 3 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false TableScan alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE @@ -940,17 +936,21 @@ STAGE PLANS: value expressions: value (type: int) auto parallelism: false TableScan - alias: a + alias: d Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (value = 40) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 3 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -999,7 +999,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [d, b, c, a] + /a [a, b, c, d] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -1182,65 +1182,65 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false + TableScan + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 40) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 3 + tag: 1 value expressions: value (type: int) auto parallelism: false TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 50) (type: boolean) + predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 2 value expressions: value (type: int) auto parallelism: false TableScan - alias: c + alias: d Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 40) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 2 + tag: 3 value expressions: value (type: int) auto parallelism: false - TableScan - alias: a - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1289,7 +1289,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [d, b, c, a] + /a [a, b, c, d] Needs Tagging: true Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/join_map_ppr.q.out b/ql/src/test/results/clientpositive/join_map_ppr.q.out index 96c3503..d5cc08b 100644 --- a/ql/src/test/results/clientpositive/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -106,13 +106,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-10 is a root stage Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-9 depends on stages: Stage-1 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-4 depends on stages: Stage-0 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-10 @@ -274,10 +274,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Stage: Stage-7 + Stage: Stage-9 Conditional Operator - Stage: Stage-4 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -305,11 +305,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -378,7 +378,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -447,7 +447,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-8 Move Operator files: hdfs directory: true @@ -725,13 +725,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-10 is a root stage Stage-1 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-9 depends on stages: Stage-1 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-4 depends on stages: Stage-0 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-10 @@ -898,10 +898,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Stage: Stage-7 + Stage: Stage-9 Conditional Operator - Stage: Stage-4 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -934,11 +934,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-2 + Stage: Stage-4 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1022,7 +1022,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -1106,7 +1106,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-8 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index b3f1e09..a06c86c 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -25,7 +25,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -36,7 +36,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: c Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -56,19 +56,17 @@ STAGE PLANS: 1 2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/join_merging.q.out b/ql/src/test/results/clientpositive/join_merging.q.out index df1daf4..f4a43f3 100644 --- a/ql/src/test/results/clientpositive/join_merging.q.out +++ b/ql/src/test/results/clientpositive/join_merging.q.out @@ -17,34 +17,34 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size > 10) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + TableScan + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_size > 10) (type: boolean) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) Reduce Operator Tree: Join Operator condition map: @@ -93,34 +93,34 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p3 + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size > 10) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + TableScan + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) sort order: + Map-reduce partition columns: p_partkey (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_size > 10) (type: boolean) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - value expressions: p_size (type: int) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_nullsafe.q.out b/ql/src/test/results/clientpositive/join_nullsafe.q.out index d56a04d..817614f 100644 --- a/ql/src/test/results/clientpositive/join_nullsafe.q.out +++ b/ql/src/test/results/clientpositive/join_nullsafe.q.out @@ -33,23 +33,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: @@ -110,31 +110,31 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -197,25 +197,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -303,29 +303,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int), key (type: int) + key expressions: key (type: int), value (type: int) sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int), value (type: int) + key expressions: value (type: int), key (type: int) sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Map-reduce partition columns: value (type: int), key (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) @@ -388,23 +388,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int), key (type: int) + key expressions: key (type: int), value (type: int) sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int), value (type: int) + key expressions: value (type: int), key (type: int) sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Map-reduce partition columns: value (type: int), key (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) @@ -1517,27 +1517,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is null (type: boolean) + predicate: key is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: null (type: void) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is null (type: boolean) + predicate: value is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: null (type: void) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_rc.q.out b/ql/src/test/results/clientpositive/join_rc.q.out index 515919d..9e17b0d 100644 --- a/ql/src/test/results/clientpositive/join_rc.q.out +++ b/ql/src/test/results/clientpositive/join_rc.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: join_rc2 + alias: join_rc1 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -65,9 +65,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: join_rc1 + alias: join_rc2 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -77,6 +76,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_reorder.q.out b/ql/src/test/results/clientpositive/join_reorder.q.out index 9daab54..713ae74 100644 --- a/ql/src/test/results/clientpositive/join_reorder.q.out +++ b/ql/src/test/results/clientpositive/join_reorder.q.out @@ -65,18 +65,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -88,6 +76,18 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), val (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -130,18 +130,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -153,6 +141,18 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), val (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -232,22 +232,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -329,22 +329,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -471,7 +471,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -479,21 +479,21 @@ STAGE PLANS: Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -545,7 +545,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -553,21 +553,21 @@ STAGE PLANS: Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_reorder2.q.out b/ql/src/test/results/clientpositive/join_reorder2.q.out index 9278ab5..efdac5c 100644 --- a/ql/src/test/results/clientpositive/join_reorder2.q.out +++ b/ql/src/test/results/clientpositive/join_reorder2.q.out @@ -83,7 +83,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -119,7 +119,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -195,20 +195,20 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON a.key + 1 = d.key + 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key + 1) is not null) (type: boolean) + predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -217,10 +217,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) + predicate: (key is not null and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -244,7 +244,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -282,10 +282,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: (_col0 + 1) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + TableScan alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -297,13 +304,6 @@ STAGE PLANS: Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), val (type: string) - TableScan - Reduce Output Operator - key expressions: (_col0 + 1) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + 1) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_reorder3.q.out b/ql/src/test/results/clientpositive/join_reorder3.q.out index 761b8b3..99ac232 100644 --- a/ql/src/test/results/clientpositive/join_reorder3.q.out +++ b/ql/src/test/results/clientpositive/join_reorder3.q.out @@ -83,7 +83,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -119,7 +119,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -195,20 +195,20 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON a.key + 1 = d.key + 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key + 1) is not null) (type: boolean) + predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -217,10 +217,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) + predicate: (key is not null and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -244,7 +244,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -282,10 +282,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: (_col0 + 1) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + TableScan alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -297,13 +304,6 @@ STAGE PLANS: Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), val (type: string) - TableScan - Reduce Output Operator - key expressions: (_col0 + 1) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + 1) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_star.q.out b/ql/src/test/results/clientpositive/join_star.q.out index 94c3a8c..d214e00 100644 --- a/ql/src/test/results/clientpositive/join_star.q.out +++ b/ql/src/test/results/clientpositive/join_star.q.out @@ -223,12 +223,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: dim1 @@ -267,7 +267,7 @@ STAGE PLANS: 0 _col3 (type: int) 1 f3 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -345,12 +345,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: dim1 @@ -389,7 +389,7 @@ STAGE PLANS: 0 _col8 (type: int) 1 f3 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -467,12 +467,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: dim1 @@ -505,7 +505,7 @@ STAGE PLANS: 0 _col8 (type: int) 1 f3 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -596,12 +596,12 @@ POSTHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 Left outer Join dim7 on dim6.f12 = dim7.f13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-18 is a root stage - Stage-12 depends on stages: Stage-18 - Stage-0 depends on stages: Stage-12 + Stage-20 is a root stage + Stage-14 depends on stages: Stage-20 + Stage-0 depends on stages: Stage-14 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-20 Map Reduce Local Work Alias -> Map Local Tables: dim1 @@ -708,7 +708,7 @@ STAGE PLANS: 0 _col28 (type: int) 1 f13 (type: int) - Stage: Stage-12 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join_thrift.q.out b/ql/src/test/results/clientpositive/join_thrift.q.out index f0751e2..58a2f88 100644 --- a/ql/src/test/results/clientpositive/join_thrift.q.out +++ b/ql/src/test/results/clientpositive/join_thrift.q.out @@ -39,7 +39,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: aint is not null (type: boolean) @@ -49,9 +49,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: aint (type: int) Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - value expressions: lintstring (type: array>) TableScan - alias: s1 + alias: s2 Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: aint is not null (type: boolean) @@ -61,6 +60,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: aint (type: int) Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + value expressions: lintstring (type: array>) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/join_vc.q.out b/ql/src/test/results/clientpositive/join_vc.q.out index fe519e8..a4452b2 100644 --- a/ql/src/test/results/clientpositive/join_vc.q.out +++ b/ql/src/test/results/clientpositive/join_vc.q.out @@ -11,38 +11,38 @@ POSTHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t1 + alias: t2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -59,10 +59,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + TableScan alias: t3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -74,12 +80,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -100,7 +100,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -158,7 +158,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 100) (type: boolean) @@ -168,9 +168,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: BLOCK__OFFSET__INSIDE__FILE (type: bigint) TableScan - alias: t1 + alias: t2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 100) (type: boolean) @@ -180,6 +179,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: BLOCK__OFFSET__INSIDE__FILE (type: bigint) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/lateral_view.q.out b/ql/src/test/results/clientpositive/lateral_view.q.out index 25ed62f..ce20041 100644 --- a/ql/src/test/results/clientpositive/lateral_view.q.out +++ b/ql/src/test/results/clientpositive/lateral_view.q.out @@ -132,14 +132,14 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -159,11 +159,11 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -199,19 +199,19 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1 @@ -235,7 +235,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1 @@ -259,16 +259,16 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: int) outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1 @@ -292,7 +292,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1 @@ -332,21 +332,21 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col6 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -366,11 +366,11 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col6 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -390,18 +390,18 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5 - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Forward - Statistics: Num rows: 1000 Data size: 158000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 24000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col6 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -421,11 +421,11 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col6 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator expressions: _col6 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 268000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -519,10 +519,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col4 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int) outputColumnNames: _col0 @@ -546,7 +546,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col4 - Statistics: Num rows: 1000 Data size: 162000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col4 (type: int) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/lateral_view_cp.q.out b/ql/src/test/results/clientpositive/lateral_view_cp.q.out index 514bfad..dad7b0e 100644 --- a/ql/src/test/results/clientpositive/lateral_view_cp.q.out +++ b/ql/src/test/results/clientpositive/lateral_view_cp.q.out @@ -80,8 +80,25 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col2 Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) + Group By Operator + aggregations: count(_col2) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator outputColumnNames: _col2 Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -95,31 +112,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col2 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/lateral_view_noalias.q.out b/ql/src/test/results/clientpositive/lateral_view_noalias.q.out index 51a29d0..e1445bf 100644 --- a/ql/src/test/results/clientpositive/lateral_view_noalias.q.out +++ b/ql/src/test/results/clientpositive/lateral_view_noalias.q.out @@ -18,14 +18,14 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -45,11 +45,11 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE @@ -158,10 +158,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 @@ -182,7 +182,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 @@ -259,10 +259,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 @@ -283,7 +283,7 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 diff --git a/ql/src/test/results/clientpositive/limit_pushdown.q.out b/ql/src/test/results/clientpositive/limit_pushdown.q.out index 339297c..ed6b7aa 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown.q.out @@ -679,17 +679,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -698,14 +694,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: double) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE @@ -805,22 +801,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -893,19 +885,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce diff --git a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index b27e049..0691339 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -25,7 +25,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -293,19 +293,15 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/lineage1.q.out b/ql/src/test/results/clientpositive/lineage1.q.out index 17d8ff1..4bead1a 100644 --- a/ql/src/test/results/clientpositive/lineage1.q.out +++ b/ql/src/test/results/clientpositive/lineage1.q.out @@ -34,7 +34,7 @@ FROM (SELECT t1.key, p1.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9 + Stage-2 depends on stages: Stage-1, Stage-10 Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 @@ -42,7 +42,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage + Stage-10 is a root stage STAGE PLANS: Stage: Stage-1 @@ -171,26 +171,26 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-9 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - alias: p2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: t2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: p2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -242,4 +242,4 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_l1 POSTHOOK: Lineage: dest_l1.key EXPRESSION [(src1)t1.FieldSchema(name:key, type:string, comment:default), (src1)t2.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_l1.value EXPRESSION [(src)p2.FieldSchema(name:value, type:string, comment:default), (src)p1.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_l1.value EXPRESSION [(src)p1.FieldSchema(name:value, type:string, comment:default), (src)p2.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/load_dyn_part13.q.out index a7dfd13..1bdfd04 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -69,50 +69,42 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 20) and (key < 40)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), '33' (type: string) + expressions: key (type: string), value (type: string), '22' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part13 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part13 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 20) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > 20) and (key < 40)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string), '22' (type: string) + expressions: key (type: string), value (type: string), '33' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part13 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part13 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/load_dyn_part14.q.out index 118d198..42eec85 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -74,7 +74,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'k2' (type: string), '' (type: string) + expressions: 'k1' (type: string), UDFToString(null) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE Limit @@ -105,48 +105,36 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 TableScan Union Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 TableScan Union Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 Stage: Stage-8 Conditional Operator @@ -209,24 +197,24 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'k3' (type: string), ' ' (type: string) + expressions: 'k2' (type: string), '' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 85500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -241,24 +229,24 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'k1' (type: string), UDFToString(null) (type: string) + expressions: 'k3' (type: string), ' ' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 85500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/louter_join_ppr.q.out index 7db4457..2c7aab6 100644 --- a/ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -113,35 +113,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -482,36 +482,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -954,36 +954,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1421,35 +1421,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: diff --git a/ql/src/test/results/clientpositive/mapjoin_distinct.q.out b/ql/src/test/results/clientpositive/mapjoin_distinct.q.out index d0fca5f..4e50aa0 100644 --- a/ql/src/test/results/clientpositive/mapjoin_distinct.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_distinct.q.out @@ -13,8 +13,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-4 @@ -59,20 +59,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -88,7 +84,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -209,20 +205,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -292,8 +284,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-4 @@ -338,15 +330,11 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -362,7 +350,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -483,15 +471,11 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/mapjoin_hook.q.out index 89632f2..8a6743c 100644 --- a/ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -16,7 +16,7 @@ PREHOOK: Output: default@dest1 RUN: Stage-4:MAPREDLOCAL RUN: Stage-1:MAPRED RUN: Stage-0:MOVE -RUN: Stage-2:STATS +RUN: Stage-3:STATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index 4c2d8a5..49c039e 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -659,22 +659,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + aggregations: count() + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out index 4084764..6f5f643 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -83,32 +83,28 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -326,32 +322,28 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index f86c21c..121b724 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -2817,25 +2817,6 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part - TableScan - Union - Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE @@ -2844,6 +2825,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + TableScan + Union + Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1001 Data size: 10883 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/merge_join_1.q.out b/ql/src/test/results/clientpositive/merge_join_1.q.out new file mode 100644 index 0000000..eddd3a7 --- /dev/null +++ b/ql/src/test/results/clientpositive/merge_join_1.q.out @@ -0,0 +1,192 @@ +PREHOOK: query: drop table if exists test_join_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists test_join_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists test_join_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists test_join_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test_join_1(a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_join_1 +POSTHOOK: query: create table test_join_1(a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_join_1 +PREHOOK: query: create table test_join_2(a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_join_2 +POSTHOOK: query: create table test_join_2(a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_join_2 +PREHOOK: query: explain +select * from +( + SELECT a a, b b + FROM test_join_1 +)t1 + +join + +( + SELECT a a, b b + FROM test_join_1 +)t2 + on t1.a = t2.a + and t1.a = t2.b + +join + +( + select a from test_join_2 +)t3 on t1.a = t3.a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from +( + SELECT a a, b b + FROM test_join_1 +)t1 + +join + +( + SELECT a a, b b + FROM test_join_1 +)t2 + on t1.a = t2.a + and t1.a = t2.b + +join + +( + select a from test_join_2 +)t3 on t1.a = t3.a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_join_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: test_join_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (a is not null and b is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + TableScan + alias: test_join_2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table test_join_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_join_1 +PREHOOK: Output: default@test_join_1 +POSTHOOK: query: drop table test_join_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_join_1 +POSTHOOK: Output: default@test_join_1 +PREHOOK: query: drop table test_join_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_join_2 +PREHOOK: Output: default@test_join_2 +POSTHOOK: query: drop table test_join_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_join_2 +POSTHOOK: Output: default@test_join_2 diff --git a/ql/src/test/results/clientpositive/mergejoins.q.out b/ql/src/test/results/clientpositive/mergejoins.q.out index d5f5bb4..af3e404 100644 --- a/ql/src/test/results/clientpositive/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/mergejoins.q.out @@ -52,10 +52,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: val1 is not null (type: boolean) + predicate: (val1 is not null and val2 is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: val1 (type: int) @@ -88,10 +88,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val2 (type: int) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (val1 is not null and val2 is not null) (type: boolean) + predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: val1 (type: int) @@ -183,7 +183,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -192,7 +192,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -201,7 +201,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/mergejoins_mixed.q.out b/ql/src/test/results/clientpositive/mergejoins_mixed.q.out index 045fc7a..d525a91 100644 --- a/ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ b/ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -54,7 +54,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -108,7 +108,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -135,7 +135,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -189,7 +189,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -216,7 +216,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -270,7 +270,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -297,7 +297,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -354,7 +354,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -372,7 +372,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -460,7 +460,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -478,7 +478,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -566,7 +566,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -584,7 +584,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -665,17 +665,17 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -687,7 +687,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -714,7 +714,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -749,10 +749,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator @@ -761,13 +768,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -802,17 +802,17 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -824,7 +824,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -851,7 +851,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -886,10 +886,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator @@ -898,13 +905,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -939,17 +939,17 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -961,7 +961,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -988,7 +988,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1023,10 +1023,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator @@ -1035,13 +1042,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1076,17 +1076,17 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1098,7 +1098,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1125,7 +1125,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1160,10 +1160,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator @@ -1172,13 +1179,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1215,16 +1215,16 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1236,7 +1236,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1263,18 +1263,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: _col6 (type: string) sort order: + - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -1285,12 +1283,14 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: _col6 (type: string) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: _col6 (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/metadataonly1.q.out b/ql/src/test/results/clientpositive/metadataonly1.q.out index a7dc080..f20b256 100644 --- a/ql/src/test/results/clientpositive/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/metadataonly1.q.out @@ -713,27 +713,23 @@ STAGE PLANS: isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -874,30 +870,28 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/multiMapJoin1.q.out index 07be9d8..b4a84c8 100644 --- a/ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ b/ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -181,12 +181,12 @@ JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 @@ -225,7 +225,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -245,31 +245,25 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -320,8 +314,8 @@ POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-3:MAPRED 580 PREHOOK: query: -- Now run a query with two-way join, which should be converted into a -- map-join followed by groupby - two MR jobs overall @@ -346,12 +340,12 @@ JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 @@ -390,7 +384,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -410,31 +404,25 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -485,8 +473,8 @@ POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-3:MAPRED 580 PREHOOK: query: -- Now run a query with two-way join, which should first be converted into a -- map-join followed by groupby and then finally into a single MR job. @@ -515,12 +503,12 @@ smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 @@ -559,7 +547,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -579,37 +567,29 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {key} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {key} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col3 + Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string) - outputColumnNames: _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -663,8 +643,8 @@ POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-3:MAPRED 270 10 10 @@ -818,38 +798,38 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-20 is a root stage , consists of Stage-27, Stage-28, Stage-5 - Stage-27 has a backup stage: Stage-5 - Stage-18 depends on stages: Stage-27 - Stage-17 depends on stages: Stage-5, Stage-18, Stage-19 , consists of Stage-25, Stage-26, Stage-1 - Stage-25 has a backup stage: Stage-1 - Stage-15 depends on stages: Stage-25 - Stage-14 depends on stages: Stage-1, Stage-15, Stage-16 , consists of Stage-23, Stage-24, Stage-2 - Stage-23 has a backup stage: Stage-2 - Stage-12 depends on stages: Stage-23 - Stage-11 depends on stages: Stage-2, Stage-12, Stage-13 , consists of Stage-21, Stage-22, Stage-3 - Stage-21 has a backup stage: Stage-3 - Stage-9 depends on stages: Stage-21 - Stage-4 depends on stages: Stage-3, Stage-9, Stage-10 - Stage-22 has a backup stage: Stage-3 + Stage-21 is a root stage , consists of Stage-28, Stage-29, Stage-1 + Stage-28 has a backup stage: Stage-1 + Stage-19 depends on stages: Stage-28 + Stage-18 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-26, Stage-27, Stage-2 + Stage-26 has a backup stage: Stage-2 + Stage-16 depends on stages: Stage-26 + Stage-15 depends on stages: Stage-2, Stage-16, Stage-17 , consists of Stage-24, Stage-25, Stage-3 + Stage-24 has a backup stage: Stage-3 + Stage-13 depends on stages: Stage-24 + Stage-12 depends on stages: Stage-3, Stage-13, Stage-14 , consists of Stage-22, Stage-23, Stage-4 + Stage-22 has a backup stage: Stage-4 Stage-10 depends on stages: Stage-22 + Stage-5 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-23 has a backup stage: Stage-4 + Stage-11 depends on stages: Stage-23 + Stage-4 + Stage-25 has a backup stage: Stage-3 + Stage-14 depends on stages: Stage-25 Stage-3 - Stage-24 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-24 + Stage-27 has a backup stage: Stage-2 + Stage-17 depends on stages: Stage-27 Stage-2 - Stage-26 has a backup stage: Stage-1 - Stage-16 depends on stages: Stage-26 + Stage-29 has a backup stage: Stage-1 + Stage-20 depends on stages: Stage-29 Stage-1 - Stage-28 has a backup stage: Stage-5 - Stage-19 depends on stages: Stage-28 - Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-20 + Stage: Stage-21 Conditional Operator - Stage: Stage-27 + Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 @@ -869,7 +849,7 @@ STAGE PLANS: 0 key1 (type: string) 1 key (type: string) - Stage: Stage-18 + Stage: Stage-19 Map Reduce Map Operator Tree: TableScan @@ -898,10 +878,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-17 + Stage: Stage-18 Conditional Operator - Stage: Stage-25 + Stage: Stage-26 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:smalltbl2 @@ -921,7 +901,7 @@ STAGE PLANS: 0 _col3 (type: string) 1 value (type: string) - Stage: Stage-15 + Stage: Stage-16 Map Reduce Map Operator Tree: TableScan @@ -947,10 +927,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-23 + Stage: Stage-24 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 @@ -970,7 +950,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 key (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -996,10 +976,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-11 + Stage: Stage-12 Conditional Operator - Stage: Stage-21 + Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: smalltbl4 @@ -1019,7 +999,7 @@ STAGE PLANS: 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-9 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -1049,7 +1029,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1075,7 +1055,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-22 + Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -1092,7 +1072,7 @@ STAGE PLANS: 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -1125,7 +1105,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1171,7 +1151,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-24 + Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: join3:$INTNAME @@ -1188,7 +1168,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 key (type: string) - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1217,7 +1197,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1258,7 +1238,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-26 + Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:$INTNAME @@ -1275,7 +1255,7 @@ STAGE PLANS: 0 _col3 (type: string) 1 value (type: string) - Stage: Stage-16 + Stage: Stage-17 Map Reduce Map Operator Tree: TableScan @@ -1304,10 +1284,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1319,13 +1306,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1346,7 +1326,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-28 + Stage: Stage-29 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:bigtbl @@ -1366,7 +1346,7 @@ STAGE PLANS: 0 key1 (type: string) 1 key (type: string) - Stage: Stage-19 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan @@ -1395,7 +1375,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1523,19 +1503,19 @@ POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### -RUN: Stage-20:CONDITIONAL -RUN: Stage-27:MAPREDLOCAL -RUN: Stage-18:MAPRED -RUN: Stage-17:CONDITIONAL -RUN: Stage-25:MAPREDLOCAL -RUN: Stage-15:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-23:MAPREDLOCAL -RUN: Stage-12:MAPRED -RUN: Stage-11:CONDITIONAL -RUN: Stage-21:MAPREDLOCAL -RUN: Stage-9:MAPRED -RUN: Stage-4:MAPRED +RUN: Stage-21:CONDITIONAL +RUN: Stage-28:MAPREDLOCAL +RUN: Stage-19:MAPRED +RUN: Stage-18:CONDITIONAL +RUN: Stage-26:MAPREDLOCAL +RUN: Stage-16:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-24:MAPREDLOCAL +RUN: Stage-13:MAPRED +RUN: Stage-12:CONDITIONAL +RUN: Stage-22:MAPREDLOCAL +RUN: Stage-10:MAPRED +RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4). @@ -1610,12 +1590,12 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-13 is a root stage - Stage-4 depends on stages: Stage-13 - Stage-0 depends on stages: Stage-4 + Stage-14 is a root stage + Stage-5 depends on stages: Stage-14 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-13 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 @@ -1688,7 +1668,7 @@ STAGE PLANS: 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1868,8 +1848,8 @@ POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### -RUN: Stage-13:MAPREDLOCAL -RUN: Stage-4:MAPRED +RUN: Stage-14:MAPREDLOCAL +RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this @@ -1946,14 +1926,14 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-11 depends on stages: Stage-14 - Stage-13 depends on stages: Stage-11 - Stage-4 depends on stages: Stage-13 - Stage-0 depends on stages: Stage-4 + Stage-15 is a root stage + Stage-12 depends on stages: Stage-15 + Stage-14 depends on stages: Stage-12 + Stage-5 depends on stages: Stage-14 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 @@ -1992,7 +1972,7 @@ STAGE PLANS: 0 _col3 (type: string) 1 value (type: string) - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -2040,7 +2020,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-13 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 @@ -2079,7 +2059,7 @@ STAGE PLANS: 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -2224,10 +2204,10 @@ POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### +RUN: Stage-15:MAPREDLOCAL +RUN: Stage-12:MAPRED RUN: Stage-14:MAPREDLOCAL -RUN: Stage-11:MAPRED -RUN: Stage-13:MAPREDLOCAL -RUN: Stage-4:MAPRED +RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size -- to 0. The plan will be the same as the one with a disabled nonconditionaltask. @@ -2300,38 +2280,38 @@ FROM (SELECT join2.key1 as key1, JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-20 is a root stage , consists of Stage-27, Stage-28, Stage-5 - Stage-27 has a backup stage: Stage-5 - Stage-18 depends on stages: Stage-27 - Stage-17 depends on stages: Stage-5, Stage-18, Stage-19 , consists of Stage-25, Stage-26, Stage-1 - Stage-25 has a backup stage: Stage-1 - Stage-15 depends on stages: Stage-25 - Stage-14 depends on stages: Stage-1, Stage-15, Stage-16 , consists of Stage-23, Stage-24, Stage-2 - Stage-23 has a backup stage: Stage-2 - Stage-12 depends on stages: Stage-23 - Stage-11 depends on stages: Stage-2, Stage-12, Stage-13 , consists of Stage-21, Stage-22, Stage-3 - Stage-21 has a backup stage: Stage-3 - Stage-9 depends on stages: Stage-21 - Stage-4 depends on stages: Stage-3, Stage-9, Stage-10 - Stage-22 has a backup stage: Stage-3 + Stage-21 is a root stage , consists of Stage-28, Stage-29, Stage-1 + Stage-28 has a backup stage: Stage-1 + Stage-19 depends on stages: Stage-28 + Stage-18 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-26, Stage-27, Stage-2 + Stage-26 has a backup stage: Stage-2 + Stage-16 depends on stages: Stage-26 + Stage-15 depends on stages: Stage-2, Stage-16, Stage-17 , consists of Stage-24, Stage-25, Stage-3 + Stage-24 has a backup stage: Stage-3 + Stage-13 depends on stages: Stage-24 + Stage-12 depends on stages: Stage-3, Stage-13, Stage-14 , consists of Stage-22, Stage-23, Stage-4 + Stage-22 has a backup stage: Stage-4 Stage-10 depends on stages: Stage-22 + Stage-5 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-23 has a backup stage: Stage-4 + Stage-11 depends on stages: Stage-23 + Stage-4 + Stage-25 has a backup stage: Stage-3 + Stage-14 depends on stages: Stage-25 Stage-3 - Stage-24 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-24 + Stage-27 has a backup stage: Stage-2 + Stage-17 depends on stages: Stage-27 Stage-2 - Stage-26 has a backup stage: Stage-1 - Stage-16 depends on stages: Stage-26 + Stage-29 has a backup stage: Stage-1 + Stage-20 depends on stages: Stage-29 Stage-1 - Stage-28 has a backup stage: Stage-5 - Stage-19 depends on stages: Stage-28 - Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-20 + Stage: Stage-21 Conditional Operator - Stage: Stage-27 + Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 @@ -2351,7 +2331,7 @@ STAGE PLANS: 0 key1 (type: string) 1 key (type: string) - Stage: Stage-18 + Stage: Stage-19 Map Reduce Map Operator Tree: TableScan @@ -2380,10 +2360,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-17 + Stage: Stage-18 Conditional Operator - Stage: Stage-25 + Stage: Stage-26 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:smalltbl2 @@ -2403,7 +2383,7 @@ STAGE PLANS: 0 _col3 (type: string) 1 value (type: string) - Stage: Stage-15 + Stage: Stage-16 Map Reduce Map Operator Tree: TableScan @@ -2429,10 +2409,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-23 + Stage: Stage-24 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 @@ -2452,7 +2432,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 key (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -2478,10 +2458,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-11 + Stage: Stage-12 Conditional Operator - Stage: Stage-21 + Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: smalltbl4 @@ -2501,7 +2481,7 @@ STAGE PLANS: 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-9 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -2531,7 +2511,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -2557,7 +2537,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-22 + Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -2574,7 +2554,7 @@ STAGE PLANS: 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -2607,7 +2587,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2653,7 +2633,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-24 + Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: join3:$INTNAME @@ -2670,7 +2650,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 key (type: string) - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -2699,7 +2679,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2740,7 +2720,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-26 + Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:$INTNAME @@ -2757,7 +2737,7 @@ STAGE PLANS: 0 _col3 (type: string) 1 value (type: string) - Stage: Stage-16 + Stage: Stage-17 Map Reduce Map Operator Tree: TableScan @@ -2786,10 +2766,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) + TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2801,13 +2788,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2828,7 +2808,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-28 + Stage: Stage-29 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:bigtbl @@ -2848,7 +2828,7 @@ STAGE PLANS: 0 key1 (type: string) 1 key (type: string) - Stage: Stage-19 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan @@ -2877,7 +2857,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -3005,17 +2985,17 @@ POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### -RUN: Stage-20:CONDITIONAL -RUN: Stage-27:MAPREDLOCAL -RUN: Stage-18:MAPRED -RUN: Stage-17:CONDITIONAL -RUN: Stage-25:MAPREDLOCAL -RUN: Stage-15:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-23:MAPREDLOCAL -RUN: Stage-12:MAPRED -RUN: Stage-11:CONDITIONAL -RUN: Stage-21:MAPREDLOCAL -RUN: Stage-9:MAPRED -RUN: Stage-4:MAPRED +RUN: Stage-21:CONDITIONAL +RUN: Stage-28:MAPREDLOCAL +RUN: Stage-19:MAPRED +RUN: Stage-18:CONDITIONAL +RUN: Stage-26:MAPREDLOCAL +RUN: Stage-16:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-24:MAPREDLOCAL +RUN: Stage-13:MAPRED +RUN: Stage-12:CONDITIONAL +RUN: Stage-22:MAPREDLOCAL +RUN: Stage-10:MAPRED +RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 diff --git a/ql/src/test/results/clientpositive/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/multiMapJoin2.q.out index ac3cd1e..5ed27d6 100644 --- a/ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -63,7 +63,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x2 + alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -79,22 +79,14 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x1 + alias: x2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -110,20 +102,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -278,13 +262,13 @@ STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery1:tmp-subquery1:y1 + null-subquery2:tmp-subquery2:y2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery1:tmp-subquery1:y1 + null-subquery2:tmp-subquery2:y2 TableScan - alias: y1 + alias: y2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -301,7 +285,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x1 + alias: x2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -317,29 +301,25 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery2:tmp-subquery2:y2 + null-subquery1:tmp-subquery1:y1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery2:tmp-subquery2:y2 + null-subquery1:tmp-subquery1:y1 TableScan - alias: y2 + alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -358,16 +338,12 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x2 + alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -383,20 +359,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -537,13 +505,13 @@ FROM (SELECT x1.key AS key FROM src1 x1 GROUP BY x1.key ORDER BY tmp.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-6 depends on stages: Stage-4 + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -569,16 +537,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce Local Work @@ -608,14 +572,10 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE TableScan alias: x2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -633,20 +593,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 287 Data size: 3012 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -686,7 +638,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -RUN: Stage-4:MAPRED +RUN: Stage-1:MAPRED RUN: Stage-6:MAPREDLOCAL RUN: Stage-2:MAPRED @@ -799,13 +751,13 @@ STAGE PLANS: Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - tmp2:y2 + tmp1:y1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - tmp2:y2 + tmp1:y1 TableScan - alias: y2 + alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -822,7 +774,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x2 + alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -838,20 +790,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -859,17 +807,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-12 Conditional Operator @@ -877,11 +821,11 @@ STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -905,20 +849,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -930,7 +871,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -938,17 +879,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce @@ -957,15 +894,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -974,11 +911,11 @@ STAGE PLANS: Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -1002,20 +939,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1027,13 +961,13 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1042,34 +976,30 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: - tmp1:y1 + tmp2:y2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - tmp1:y1 + tmp2:y2 TableScan - alias: y1 + alias: y2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1086,7 +1016,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x1 + alias: x2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1102,20 +1032,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1123,17 +1049,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1275,7 +1197,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x2 + alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1291,22 +1213,18 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x1 + alias: x2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1322,20 +1240,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1346,83 +1260,59 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 + Mux Operator + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 + Mux Operator + Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -1536,21 +1426,21 @@ GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-10 depends on stages: Stage-2, Stage-7 , consists of Stage-12, Stage-13, Stage-3 - Stage-12 has a backup stage: Stage-3 + Stage-1 is a root stage + Stage-10 depends on stages: Stage-1, Stage-6 , consists of Stage-12, Stage-13, Stage-2 + Stage-12 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-12 - Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 - Stage-5 depends on stages: Stage-4 - Stage-13 has a backup stage: Stage-3 + Stage-3 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-4 depends on stages: Stage-3 + Stage-13 has a backup stage: Stage-2 Stage-9 depends on stages: Stage-13 - Stage-3 + Stage-2 Stage-14 is a root stage - Stage-2 depends on stages: Stage-14 - Stage-0 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-14 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1559,36 +1449,28 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-10 Conditional Operator @@ -1596,11 +1478,11 @@ STAGE PLANS: Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -1624,24 +1506,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1649,7 +1528,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1657,34 +1536,30 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1693,11 +1568,11 @@ STAGE PLANS: Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $INTNAME TableScan HashTable Sink Operator condition expressions: @@ -1721,24 +1596,21 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1746,13 +1618,13 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1761,23 +1633,19 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-14 Map Reduce Local Work @@ -1801,7 +1669,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-2 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1821,20 +1689,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1842,17 +1706,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1888,14 +1748,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -RUN: Stage-7:MAPRED +RUN: Stage-1:MAPRED RUN: Stage-14:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-6:MAPRED RUN: Stage-10:CONDITIONAL RUN: Stage-12:MAPREDLOCAL RUN: Stage-8:MAPRED +RUN: Stage-3:MAPRED RUN: Stage-4:MAPRED -RUN: Stage-5:MAPRED 128 1 146 1 150 1 @@ -1944,13 +1804,13 @@ GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-1 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: tmp2:y2 @@ -1972,7 +1832,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1981,20 +1841,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2012,20 +1868,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2036,85 +1888,61 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 + Mux Operator + Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 + Mux Operator + Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2169,9 +1997,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-6:MAPREDLOCAL +RUN: Stage-1:MAPRED RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED 128 1 146 1 150 1 @@ -2283,17 +2111,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Statistics: Num rows: 69 Data size: 698 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 69 Data size: 698 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2368,21 +2194,21 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-17 is a root stage Stage-2 depends on stages: Stage-17 - Stage-16 depends on stages: Stage-2, Stage-7 + Stage-16 depends on stages: Stage-2, Stage-8 Stage-4 depends on stages: Stage-16 Stage-18 is a root stage - Stage-7 depends on stages: Stage-18 + Stage-8 depends on stages: Stage-18 Stage-0 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery2:x-subquery2:tmp:a + null-subquery1:x-subquery1:tmp:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery2:x-subquery2:tmp:a + null-subquery1:x-subquery1:tmp:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2417,20 +2243,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2438,17 +2260,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-16 Map Reduce Local Work @@ -2503,20 +2321,20 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) outputColumnNames: _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 604 Data size: 6426 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 604 Data size: 6426 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 604 Data size: 6426 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2532,20 +2350,20 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) outputColumnNames: _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 604 Data size: 6426 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 604 Data size: 6426 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 604 Data size: 6426 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2556,11 +2374,11 @@ STAGE PLANS: Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery1:x-subquery1:tmp:a + null-subquery2:x-subquery2:tmp:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery1:x-subquery1:tmp:a + null-subquery2:x-subquery2:tmp:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2575,7 +2393,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -2595,20 +2413,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -2616,17 +2430,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -2661,7 +2471,7 @@ POSTHOOK: Input: default@src RUN: Stage-17:MAPREDLOCAL RUN: Stage-18:MAPREDLOCAL RUN: Stage-2:MAPRED -RUN: Stage-7:MAPRED +RUN: Stage-8:MAPRED RUN: Stage-16:MAPREDLOCAL RUN: Stage-4:MAPRED 0 diff --git a/ql/src/test/results/clientpositive/multi_insert.q.out b/ql/src/test/results/clientpositive/multi_insert.q.out index 2bc128b..ea2e554 100644 --- a/ql/src/test/results/clientpositive/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/multi_insert.q.out @@ -757,15 +757,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -777,18 +773,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -797,18 +789,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -919,15 +907,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -939,18 +923,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -959,18 +939,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-8 Conditional Operator @@ -1149,15 +1125,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1169,18 +1141,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1189,18 +1157,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -1311,15 +1275,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1331,18 +1291,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1351,18 +1307,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-8 Conditional Operator @@ -1547,33 +1499,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1586,33 +1530,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -1755,33 +1691,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1794,33 +1722,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-8 Conditional Operator @@ -2031,33 +1951,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2070,33 +1982,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -2239,33 +2143,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2278,33 +2174,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/multi_insert_gby.q.out index f10bac7..3c51f58 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -49,15 +49,11 @@ STAGE PLANS: Filter Operator predicate: ((key > 450) or (key > 500)) (type: boolean) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out index 9c20dbb..f4baf04 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -99,18 +99,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index db291e2..6ee003b 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -1775,38 +1775,30 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 Stage: Stage-4 Map Reduce diff --git a/ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out index 0006ce9..121f78c 100644 --- a/ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_lateral_view.q.out @@ -389,22 +389,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -415,22 +411,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -440,8 +432,26 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: array((key + 3),(key + 4)) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -456,32 +466,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Select Operator - expressions: array((key + 3),(key + 4)) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col5 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -489,18 +473,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Stage: Stage-0 Move Operator @@ -532,18 +512,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Stage: Stage-1 Move Operator @@ -660,22 +636,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -686,35 +658,27 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Filter Operator predicate: ((key > 200) or (key < 200)) (type: boolean) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -722,18 +686,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Stage: Stage-0 Move Operator @@ -770,18 +730,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -791,18 +747,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Stage: Stage-1 Move Operator @@ -1065,18 +1017,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Stage: Stage-0 Move Operator @@ -1107,18 +1055,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Stage: Stage-1 Move Operator @@ -1149,18 +1093,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Stage: Stage-2 Move Operator @@ -1326,21 +1266,17 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -1351,21 +1287,17 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1375,8 +1307,26 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: array((key + 3),(key + 4)) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1391,45 +1341,15 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Select Operator - expressions: array((key + 3),(key + 4)) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col5 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((key > 200) or (key < 200)) (type: boolean) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -1437,18 +1357,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Stage: Stage-0 Move Operator @@ -1479,18 +1395,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Stage: Stage-1 Move Operator @@ -1526,18 +1438,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1547,18 +1455,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv4 Stage: Stage-2 Move Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out index 116d9e4..c1afde8 100644 --- a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out @@ -112,17 +112,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -131,16 +127,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -176,17 +172,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Map Reduce @@ -195,16 +187,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index 580bacd..2060693 100644 --- a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -774,15 +774,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -794,18 +790,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -814,18 +806,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-4 Dependency Collection @@ -940,15 +928,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -960,18 +944,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -980,18 +960,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-9 Conditional Operator @@ -1174,15 +1150,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1194,18 +1166,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1214,18 +1182,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-4 Dependency Collection @@ -1340,15 +1304,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1360,18 +1320,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1380,18 +1336,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-9 Conditional Operator @@ -1580,33 +1532,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1619,33 +1563,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-4 Dependency Collection @@ -1792,33 +1728,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1831,33 +1759,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-9 Conditional Operator @@ -2072,33 +1992,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2111,33 +2023,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-4 Dependency Collection @@ -2284,33 +2188,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2323,33 +2219,25 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 10) (type: boolean) Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-9 Conditional Operator @@ -2949,15 +2837,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2968,17 +2852,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2986,17 +2866,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -3006,16 +2882,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3038,16 +2914,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3097,15 +2973,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3116,17 +2988,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3134,17 +3002,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -3154,16 +3018,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3186,16 +3050,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3245,15 +3109,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3264,17 +3124,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3282,17 +3138,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -3302,16 +3154,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3334,16 +3186,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3393,15 +3245,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3412,17 +3260,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3430,17 +3274,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -3450,16 +3290,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3482,16 +3322,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3579,15 +3419,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3598,17 +3434,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3616,17 +3448,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Dependency Collection @@ -3665,16 +3493,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3694,16 +3522,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3838,15 +3666,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3857,17 +3681,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3875,17 +3695,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Dependency Collection @@ -3924,16 +3740,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3953,16 +3769,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4107,15 +3923,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -4126,17 +3938,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -4144,17 +3952,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-11 Conditional Operator @@ -4271,16 +4075,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4300,16 +4104,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4454,15 +4258,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -4473,17 +4273,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -4491,17 +4287,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-11 Conditional Operator @@ -4618,16 +4410,16 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -4647,16 +4439,16 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/multi_join_union.q.out b/ql/src/test/results/clientpositive/multi_join_union.q.out index 1dbe37e..339aaf0 100644 --- a/ql/src/test/results/clientpositive/multi_join_union.q.out +++ b/ql/src/test/results/clientpositive/multi_join_union.q.out @@ -53,12 +53,12 @@ src12 b ON (a.key = b.key) JOIN (SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: a @@ -98,17 +98,13 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {_col0} - keys: - 0 _col6 (type: string) - 1 _col1 (type: string) + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {_col0} + keys: + 0 _col6 (type: string) + 1 _col1 (type: string) c-subquery2:a-subquery2:src14 TableScan alias: src14 @@ -122,19 +118,15 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {_col0} - keys: - 0 _col6 (type: string) - 1 _col1 (type: string) + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {_col0} + keys: + 0 _col6 (type: string) + 1 _col1 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/no_hooks.q.out b/ql/src/test/results/clientpositive/no_hooks.q.out index 0bd96ff..70e3e50 100644 --- a/ql/src/test/results/clientpositive/no_hooks.q.out +++ b/ql/src/test/results/clientpositive/no_hooks.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -22,7 +22,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) diff --git a/ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out b/ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out index b69e1ae..d80e492 100644 --- a/ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/nonblock_op_deduplicate.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[11][tables = [tmp2, tmp3]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [tmp2, tmp3]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- This test query is introduced for HIVE-4968. -- First, we do not convert the join to MapJoin. EXPLAIN @@ -67,12 +67,12 @@ FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count ) tmp4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -95,26 +95,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -125,6 +116,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -152,7 +148,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[11][tables = [tmp2, tmp3]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [tmp2, tmp3]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count FROM (SELECT * @@ -200,7 +196,7 @@ POSTHOOK: Input: default@src1 406 val_406 25 66 val_66 25 98 val_98 25 -Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-4:MAPRED' is a cross product +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- Then, we convert the join to MapJoin. EXPLAIN SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count @@ -224,13 +220,13 @@ FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count ) tmp4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -253,18 +249,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: tmp4:tmp2:tmp1:src1 @@ -287,7 +279,7 @@ STAGE PLANS: 0 1 - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -322,7 +314,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-4:MAPRED' is a cross product +Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: SELECT tmp4.key as key, tmp4.value as value, tmp4.count as count FROM (SELECT tmp2.key as key, tmp2.value as value, tmp3.count as count FROM (SELECT * diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index c6a4318..e303b34 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -1067,7 +1067,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1079,7 +1079,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: src + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/notable_alias1.q.out b/ql/src/test/results/clientpositive/notable_alias1.q.out index ab5141d..2a40bdd 100644 --- a/ql/src/test/results/clientpositive/notable_alias1.q.out +++ b/ql/src/test/results/clientpositive/notable_alias1.q.out @@ -29,22 +29,18 @@ STAGE PLANS: Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/notable_alias2.q.out b/ql/src/test/results/clientpositive/notable_alias2.q.out index c75d319..4139906 100644 --- a/ql/src/test/results/clientpositive/notable_alias2.q.out +++ b/ql/src/test/results/clientpositive/notable_alias2.q.out @@ -29,22 +29,18 @@ STAGE PLANS: Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/nullgroup2.q.out b/ql/src/test/results/clientpositive/nullgroup2.q.out index cb92904..802650a 100644 --- a/ql/src/test/results/clientpositive/nullgroup2.q.out +++ b/ql/src/test/results/clientpositive/nullgroup2.q.out @@ -19,22 +19,18 @@ STAGE PLANS: Filter Operator predicate: (key > 9999) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -112,22 +108,18 @@ STAGE PLANS: Filter Operator predicate: (key > 9999) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -182,15 +174,11 @@ STAGE PLANS: Filter Operator predicate: (key > 9999) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(1) @@ -268,15 +256,11 @@ STAGE PLANS: Filter Operator predicate: (key > 9999) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(1) diff --git a/ql/src/test/results/clientpositive/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/optimize_nullscan.q.out index 4e3640e..ec8387f 100644 --- a/ql/src/test/results/clientpositive/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/optimize_nullscan.q.out @@ -757,6 +757,209 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: + -mr-10004default.src{} [null-subquery1:_u1-subquery1:src] + Path -> Partition: + -mr-10004default.src{} + Partition + base file name: src + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + -mr-10004default.src{} [null-subquery1:_u1-subquery1:src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -974,238 +1177,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - Union - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 GatherStats: false - Union - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -#### A masked pattern was here #### - Partition - base file name: -mr-10003 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: - -mr-10004default.src{} [null-subquery1:_u1-subquery1:src] - Path -> Partition: - -mr-10004default.src{} - Partition - base file name: src - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - -mr-10004default.src{} [null-subquery1:_u1-subquery1:src] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1763,12 +1751,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1845,27 +1833,23 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -2075,7 +2059,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -2087,11 +2071,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 1 + tag: 0 value expressions: key (type: string) auto parallelism: false TableScan - alias: s1 + alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -2103,11 +2087,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 0 + tag: 1 value expressions: key (type: string) auto parallelism: false Path -> Alias: - -mr-10003default.src{} [s2, s1] + -mr-10003default.src{} [s1, s2] Path -> Partition: -mr-10003default.src{} Partition @@ -2154,7 +2138,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10003default.src{} [s2, s1] + -mr-10003default.src{} [s1, s2] Needs Tagging: true Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/optional_outer.q.out b/ql/src/test/results/clientpositive/optional_outer.q.out index f1b4aff..a38bee8 100644 --- a/ql/src/test/results/clientpositive/optional_outer.q.out +++ b/ql/src/test/results/clientpositive/optional_outer.q.out @@ -11,7 +11,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -20,7 +20,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -68,7 +68,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -77,7 +77,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -125,7 +125,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -134,7 +134,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -182,7 +182,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -191,7 +191,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -239,7 +239,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -248,7 +248,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -296,7 +296,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -305,7 +305,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out index 9a7b2b7..91fd38f 100644 --- a/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/orc_predicate_pushdown.q.out @@ -323,19 +323,15 @@ STAGE PLANS: Filter Operator predicate: ((t < 0) and (t > -2)) (type: boolean) Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint) - outputColumnNames: t - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(t)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(t)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -385,19 +381,15 @@ STAGE PLANS: Filter Operator predicate: ((t < 0) and (t > -2)) (type: boolean) Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t (type: tinyint) - outputColumnNames: t - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(t)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(t)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/outer_join_ppr.q.out b/ql/src/test/results/clientpositive/outer_join_ppr.q.out index 58f8ea2..93cfde2 100644 --- a/ql/src/test/results/clientpositive/outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -113,28 +113,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -577,28 +577,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/parallel.q.out b/ql/src/test/results/clientpositive/parallel.q.out index f6c0f65..1ab6a01 100644 --- a/ql/src/test/results/clientpositive/parallel.q.out +++ b/ql/src/test/results/clientpositive/parallel.q.out @@ -62,17 +62,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -82,44 +78,36 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_a + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_a Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_b + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_b Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/parallel_join0.q.out b/ql/src/test/results/clientpositive/parallel_join0.q.out index 9fdbe18..5237784 100644 --- a/ql/src/test/results/clientpositive/parallel_join0.q.out +++ b/ql/src/test/results/clientpositive/parallel_join0.q.out @@ -65,16 +65,12 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -120,7 +116,7 @@ SELECT src1.key as k1, src1.value as v1, (SELECT * FROM src WHERE src.key < 10) src2 SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY -{"STAGE PLANS":{"Stage-2":{"Map Reduce":{"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}},"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)"}}}}]}},"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Join Operator":{"outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"Select Operator":{"expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe","input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","condition map:":[{"":"Inner Join 0 to 1"}],"condition expressions:":{"1":"{VALUE._col0} {VALUE._col1}","0":"{VALUE._col0} {VALUE._col1}"}}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}},{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}},"STAGE DEPENDENCIES":{"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}}} +{"STAGE PLANS":{"Stage-2":{"Map Reduce":{"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE"}},"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"sort order:":"++++","Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)"}}}}]}},"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"Join Operator":{"outputColumnNames:":["_col0","_col1","_col2","_col3"],"children":{"File Output Operator":{"compressed:":"false","table:":{"serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe","input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"}}},"Statistics:":"Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE","condition map:":[{"":"Inner Join 0 to 1"}],"condition expressions:":{"1":"{VALUE._col0} {VALUE._col1}","0":"{VALUE._col0} {VALUE._col1}"}}},"Map Operator Tree:":[{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}},{"TableScan":{"alias:":"src","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"children":{"Reduce Output Operator":{"sort order:":"","value expressions:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE"}},"Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE"}}]}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}},"STAGE DEPENDENCIES":{"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}}} Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM @@ -142,101 +138,101 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 0 val_0 +0 val_0 2 val_2 0 val_0 4 val_4 0 val_0 5 val_5 0 val_0 5 val_5 0 val_0 8 val_8 -0 val_0 9 val_9 -2 val_2 0 val_0 +2 val_2 2 val_2 2 val_2 5 val_5 4 val_4 0 val_0 -4 val_4 2 val_2 4 val_4 4 val_4 4 val_4 5 val_5 -4 val_4 5 val_5 4 val_4 9 val_9 5 val_5 0 val_0 5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 2 val_2 -5 val_5 2 val_2 5 val_5 4 val_4 5 val_5 5 val_5 5 val_5 8 val_8 +5 val_5 8 val_8 5 val_5 9 val_9 5 val_5 9 val_9 8 val_8 0 val_0 -8 val_8 4 val_4 8 val_8 5 val_5 -8 val_8 8 val_8 +8 val_8 5 val_5 +8 val_8 5 val_5 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 0 val_0 +9 val_9 2 val_2 9 val_9 5 val_5 9 val_9 5 val_5 0 val_0 0 val_0 0 val_0 0 val_0 -0 val_0 4 val_4 -0 val_0 4 val_4 +0 val_0 0 val_0 +0 val_0 2 val_2 +0 val_0 5 val_5 0 val_0 5 val_5 0 val_0 8 val_8 +0 val_0 9 val_9 2 val_2 0 val_0 +2 val_2 5 val_5 +4 val_4 5 val_5 5 val_5 0 val_0 -5 val_5 0 val_0 -5 val_5 4 val_4 +5 val_5 2 val_2 +5 val_5 2 val_2 5 val_5 4 val_4 5 val_5 5 val_5 5 val_5 5 val_5 -5 val_5 5 val_5 +5 val_5 8 val_8 +5 val_5 9 val_9 8 val_8 0 val_0 -8 val_8 2 val_2 -8 val_8 5 val_5 -9 val_9 2 val_2 -9 val_9 9 val_9 -0 val_0 0 val_0 -0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 2 val_2 +0 val_0 4 val_4 +0 val_0 4 val_4 0 val_0 5 val_5 0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 -0 val_0 5 val_5 +0 val_0 8 val_8 +0 val_0 9 val_9 0 val_0 9 val_9 2 val_2 0 val_0 +2 val_2 0 val_0 2 val_2 4 val_4 -2 val_2 5 val_5 -2 val_2 5 val_5 2 val_2 8 val_8 +2 val_2 9 val_9 4 val_4 0 val_0 -4 val_4 0 val_0 -4 val_4 5 val_5 4 val_4 8 val_8 5 val_5 0 val_0 5 val_5 0 val_0 +5 val_5 0 val_0 +5 val_5 2 val_2 +5 val_5 4 val_4 +5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 -5 val_5 8 val_8 8 val_8 0 val_0 -8 val_8 5 val_5 -9 val_9 0 val_0 -9 val_9 0 val_0 +8 val_8 4 val_4 +9 val_9 4 val_4 9 val_9 5 val_5 9 val_9 8 val_8 +9 val_9 9 val_9 +0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 -0 val_0 2 val_2 -0 val_0 2 val_2 0 val_0 5 val_5 -0 val_0 8 val_8 -0 val_0 9 val_9 -2 val_2 2 val_2 -2 val_2 9 val_9 +0 val_0 5 val_5 +0 val_0 5 val_5 +2 val_2 5 val_5 +4 val_4 0 val_0 +4 val_4 2 val_2 +4 val_4 5 val_5 +5 val_5 0 val_0 5 val_5 0 val_0 5 val_5 0 val_0 -5 val_5 2 val_2 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 -5 val_5 8 val_8 -5 val_5 9 val_9 +8 val_8 2 val_2 +8 val_8 8 val_8 8 val_8 9 val_9 -9 val_9 0 val_0 -9 val_9 4 val_4 diff --git a/ql/src/test/results/clientpositive/parallel_join1.q.out b/ql/src/test/results/clientpositive/parallel_join1.q.out index 16b263c..a346a01 100644 --- a/ql/src/test/results/clientpositive/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/parallel_join1.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -38,9 +38,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -50,6 +49,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/parquet_array_null_element.q.out b/ql/src/test/results/clientpositive/parquet_array_null_element.q.out new file mode 100644 index 0000000..c384387 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_null_element.q.out @@ -0,0 +1,160 @@ +PREHOOK: query: DROP TABLE parquet_array_null_element_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_array_null_element_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_array_null_element +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_array_null_element +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_array_null_element_staging ( + id int, + lstint ARRAY, + lststr ARRAY, + mp MAP +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +NULL DEFINED AS '' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_null_element_staging +POSTHOOK: query: CREATE TABLE parquet_array_null_element_staging ( + id int, + lstint ARRAY, + lststr ARRAY, + mp MAP +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +NULL DEFINED AS '' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_null_element_staging +PREHOOK: query: CREATE TABLE parquet_array_null_element ( + id int, + lstint ARRAY, + lststr ARRAY, + mp MAP +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_null_element +POSTHOOK: query: CREATE TABLE parquet_array_null_element ( + id int, + lstint ARRAY, + lststr ARRAY, + mp MAP +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_null_element +PREHOOK: query: DESCRIBE FORMATTED parquet_array_null_element +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@parquet_array_null_element +POSTHOOK: query: DESCRIBE FORMATTED parquet_array_null_element +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@parquet_array_null_element +# col_name data_type comment + +id int +lstint array +lststr array +mp map + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_array_null_element.txt' OVERWRITE INTO TABLE parquet_array_null_element_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_null_element_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_array_null_element.txt' OVERWRITE INTO TABLE parquet_array_null_element_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_null_element_staging +PREHOOK: query: SELECT * FROM parquet_array_null_element_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_null_element_staging +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_null_element_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_null_element_staging +#### A masked pattern was here #### +1 [null,7] ["CARRELAGE","MOQUETTE"] {"key11":"value11","key12":"value12","key13":"value13"} +2 [null,null] ["CAILLEBOTIS",null] NULL +3 [null,42,null] NULL {"key11":"value11","key12":null,"key13":null} +PREHOOK: query: INSERT OVERWRITE TABLE parquet_array_null_element SELECT * FROM parquet_array_null_element_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_null_element_staging +PREHOOK: Output: default@parquet_array_null_element +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_array_null_element SELECT * FROM parquet_array_null_element_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_null_element_staging +POSTHOOK: Output: default@parquet_array_null_element +POSTHOOK: Lineage: parquet_array_null_element.id SIMPLE [(parquet_array_null_element_staging)parquet_array_null_element_staging.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_array_null_element.lstint SIMPLE [(parquet_array_null_element_staging)parquet_array_null_element_staging.FieldSchema(name:lstint, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_array_null_element.lststr SIMPLE [(parquet_array_null_element_staging)parquet_array_null_element_staging.FieldSchema(name:lststr, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_array_null_element.mp SIMPLE [(parquet_array_null_element_staging)parquet_array_null_element_staging.FieldSchema(name:mp, type:map, comment:null), ] +PREHOOK: query: SELECT lstint from parquet_array_null_element +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +POSTHOOK: query: SELECT lstint from parquet_array_null_element +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +[null,7] +[null,null] +[null,42,null] +PREHOOK: query: SELECT lststr from parquet_array_null_element +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +POSTHOOK: query: SELECT lststr from parquet_array_null_element +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +["CARRELAGE","MOQUETTE"] +["CAILLEBOTIS",null] +NULL +PREHOOK: query: SELECT mp from parquet_array_null_element +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +POSTHOOK: query: SELECT mp from parquet_array_null_element +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +{"key12":"value12","key11":"value11","key13":"value13"} +NULL +{"key12":null,"key11":"value11","key13":null} +PREHOOK: query: SELECT * FROM parquet_array_null_element +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_null_element +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_null_element +#### A masked pattern was here #### +1 [null,7] ["CARRELAGE","MOQUETTE"] {"key12":"value12","key11":"value11","key13":"value13"} +2 [null,null] ["CAILLEBOTIS",null] NULL +3 [null,42,null] NULL {"key12":null,"key11":"value11","key13":null} diff --git a/ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct.q.out b/ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct.q.out new file mode 100644 index 0000000..61f38af --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_multi_field_struct.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs ( + locations ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_multi_field_structs +POSTHOOK: query: -- this test creates a Parquet table with an array of multi-field structs + +CREATE TABLE parquet_array_of_multi_field_structs ( + locations ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_multi_field_structs +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_multi_field_structs +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_multi_field_structs +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_multi_field_structs +PREHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_multi_field_structs +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_multi_field_structs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_multi_field_structs +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_multi_field_structs +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_multi_field_structs +PREHOOK: Output: default@parquet_array_of_multi_field_structs +POSTHOOK: query: DROP TABLE parquet_array_of_multi_field_structs +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_multi_field_structs +POSTHOOK: Output: default@parquet_array_of_multi_field_structs +PREHOOK: query: -- maps use the same writable structure, so validate that the data can be read +-- as a map instead of an array of structs + +CREATE TABLE parquet_map_view_of_multi_field_structs ( + locations MAP +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_map_view_of_multi_field_structs +POSTHOOK: query: -- maps use the same writable structure, so validate that the data can be read +-- as a map instead of an array of structs + +CREATE TABLE parquet_map_view_of_multi_field_structs ( + locations MAP +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_map_view_of_multi_field_structs +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_map_view_of_multi_field_structs +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_map_view_of_multi_field_structs +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/MultiFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_map_view_of_multi_field_structs +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_map_view_of_multi_field_structs +PREHOOK: query: SELECT * FROM parquet_map_view_of_multi_field_structs +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_view_of_multi_field_structs +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_map_view_of_multi_field_structs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_view_of_multi_field_structs +#### A masked pattern was here #### +{0.0:180.0} +PREHOOK: query: DROP TABLE parquet_map_view_of_multi_field_structs +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_map_view_of_multi_field_structs +PREHOOK: Output: default@parquet_map_view_of_multi_field_structs +POSTHOOK: query: DROP TABLE parquet_map_view_of_multi_field_structs +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_map_view_of_multi_field_structs +POSTHOOK: Output: default@parquet_map_view_of_multi_field_structs diff --git a/ql/src/test/results/clientpositive/parquet_array_of_optional_elements.q.out b/ql/src/test/results/clientpositive/parquet_array_of_optional_elements.q.out new file mode 100644 index 0000000..5ccb1e7 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_optional_elements.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements ( + locations ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_optional_elements +POSTHOOK: query: -- this test creates a Parquet table with an array of optional structs + +CREATE TABLE parquet_array_of_optional_elements ( + locations ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_optional_elements +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_optional_elements +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewOptionalGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_optional_elements +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_optional_elements +PREHOOK: query: SELECT * FROM parquet_array_of_optional_elements +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_optional_elements +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_optional_elements +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_optional_elements +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":0.0},null,{"latitude":0.0,"longitude":180.0}] +PREHOOK: query: DROP TABLE parquet_array_of_optional_elements +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_optional_elements +PREHOOK: Output: default@parquet_array_of_optional_elements +POSTHOOK: query: DROP TABLE parquet_array_of_optional_elements +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_optional_elements +POSTHOOK: Output: default@parquet_array_of_optional_elements diff --git a/ql/src/test/results/clientpositive/parquet_array_of_required_elements.q.out b/ql/src/test/results/clientpositive/parquet_array_of_required_elements.q.out new file mode 100644 index 0000000..18165e8 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_required_elements.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements ( + locations ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_required_elements +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_required_elements ( + locations ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_required_elements +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_required_elements +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NewRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_required_elements +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_required_elements +PREHOOK: query: SELECT * FROM parquet_array_of_required_elements +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_required_elements +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_required_elements +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_required_elements +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_required_elements +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_required_elements +PREHOOK: Output: default@parquet_array_of_required_elements +POSTHOOK: query: DROP TABLE parquet_array_of_required_elements +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_required_elements +POSTHOOK: Output: default@parquet_array_of_required_elements diff --git a/ql/src/test/results/clientpositive/parquet_array_of_single_field_struct.q.out b/ql/src/test/results/clientpositive/parquet_array_of_single_field_struct.q.out new file mode 100644 index 0000000..31c4c68 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_single_field_struct.q.out @@ -0,0 +1,47 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs ( + single_element_groups ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- that has an ambiguous Parquet schema that is assumed to be a list of bigints +-- This is verifies compliance with the spec for this case. + +CREATE TABLE parquet_ambiguous_array_of_single_field_structs ( + single_element_groups ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/SingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_ambiguous_array_of_single_field_structs +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs +PREHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_ambiguous_array_of_single_field_structs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs +#### A masked pattern was here #### +[1234,2345] +PREHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs +PREHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs +POSTHOOK: query: DROP TABLE parquet_ambiguous_array_of_single_field_structs +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_ambiguous_array_of_single_field_structs +POSTHOOK: Output: default@parquet_ambiguous_array_of_single_field_structs diff --git a/ql/src/test/results/clientpositive/parquet_array_of_structs.q.out b/ql/src/test/results/clientpositive/parquet_array_of_structs.q.out new file mode 100644 index 0000000..d3ac709 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_structs.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs ( + locations ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_structs +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_array_of_structs ( + locations ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_structs +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_structs +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/HiveRequiredGroupInList.parquet' +OVERWRITE INTO TABLE parquet_array_of_structs +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_structs +PREHOOK: query: SELECT * FROM parquet_array_of_structs +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_structs +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_structs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_structs +#### A masked pattern was here #### +[{"latitude":0.0,"longitude":180.0},{"latitude":0.0,"longitude":0.0}] +PREHOOK: query: DROP TABLE parquet_array_of_structs +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_structs +PREHOOK: Output: default@parquet_array_of_structs +POSTHOOK: query: DROP TABLE parquet_array_of_structs +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_structs +POSTHOOK: Output: default@parquet_array_of_structs diff --git a/ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups.q.out b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups.q.out new file mode 100644 index 0000000..af116fa --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_groups.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups ( + list_of_points ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_groups +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of (x,y) structs + +CREATE TABLE parquet_array_of_unannotated_groups ( + list_of_points ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_groups +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_groups +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfGroups.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_groups +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_groups +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_groups +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_groups +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_groups +#### A masked pattern was here #### +[{"x":1.0,"y":1.0},{"x":2.0,"y":2.0}] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_groups +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_groups +PREHOOK: Output: default@parquet_array_of_unannotated_groups +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_groups +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_groups +POSTHOOK: Output: default@parquet_array_of_unannotated_groups diff --git a/ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives.q.out b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives.q.out new file mode 100644 index 0000000..d2be684 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_of_unannotated_primitives.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints ( + list_of_ints ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_array_of_unannotated_ints +POSTHOOK: query: -- this test creates a Parquet table from a structure with an unannotated +-- repeated structure of int32s + +CREATE TABLE parquet_array_of_unannotated_ints ( + list_of_ints ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_array_of_unannotated_ints +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_array_of_unannotated_ints +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/UnannotatedListOfPrimitives.parquet' +OVERWRITE INTO TABLE parquet_array_of_unannotated_ints +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_array_of_unannotated_ints +PREHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_array_of_unannotated_ints +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_array_of_unannotated_ints +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_array_of_unannotated_ints +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_array_of_unannotated_ints +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_array_of_unannotated_ints +PREHOOK: Output: default@parquet_array_of_unannotated_ints +POSTHOOK: query: DROP TABLE parquet_array_of_unannotated_ints +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_array_of_unannotated_ints +POSTHOOK: Output: default@parquet_array_of_unannotated_ints diff --git a/ql/src/test/results/clientpositive/parquet_avro_array_of_primitives.q.out b/ql/src/test/results/clientpositive/parquet_avro_array_of_primitives.q.out new file mode 100644 index 0000000..0776853 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_avro_array_of_primitives.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_avro_array_of_primitives ( + list_of_ints ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_primitives +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_avro_array_of_primitives ( + list_of_ints ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_primitives +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_primitives +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_primitives +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_primitives +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_primitives +PREHOOK: query: SELECT * FROM parquet_avro_array_of_primitives +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_primitives +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_primitives +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_primitives +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_avro_array_of_primitives +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_primitives +PREHOOK: Output: default@parquet_avro_array_of_primitives +POSTHOOK: query: DROP TABLE parquet_avro_array_of_primitives +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_primitives +POSTHOOK: Output: default@parquet_avro_array_of_primitives diff --git a/ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct.q.out b/ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct.q.out new file mode 100644 index 0000000..cb460b5 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_avro_array_of_single_field_struct.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs ( + single_element_groups ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-avro + +CREATE TABLE parquet_avro_array_of_single_field_structs ( + single_element_groups ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/AvroSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_avro_array_of_single_field_structs +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs +PREHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_avro_array_of_single_field_structs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_avro_array_of_single_field_structs +PREHOOK: Output: default@parquet_avro_array_of_single_field_structs +POSTHOOK: query: DROP TABLE parquet_avro_array_of_single_field_structs +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_avro_array_of_single_field_structs +POSTHOOK: Output: default@parquet_avro_array_of_single_field_structs diff --git a/ql/src/test/results/clientpositive/parquet_join.q.out b/ql/src/test/results/clientpositive/parquet_join.q.out index 15524e5..d288fa2 100644 --- a/ql/src/test/results/clientpositive/parquet_join.q.out +++ b/ql/src/test/results/clientpositive/parquet_join.q.out @@ -73,28 +73,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: p2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + alias: p1 + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: myvalue (type: string) + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE TableScan - alias: p1 - Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: NONE + alias: p2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: myvalue (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/parquet_map_null.q.out b/ql/src/test/results/clientpositive/parquet_map_null.q.out new file mode 100644 index 0000000..75c6d08 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_map_null.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values + +DROP TABLE IF EXISTS avro_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- This test attempts to write a parquet table from an avro table that contains map null values + +DROP TABLE IF EXISTS avro_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS parquet_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS parquet_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_table +POSTHOOK: query: CREATE TABLE avro_table (avreau_col_1 map) STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_table +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_table +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/map_null_val.avro' OVERWRITE INTO TABLE avro_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_table +PREHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@avro_table +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_table +POSTHOOK: query: CREATE TABLE parquet_table STORED AS PARQUET AS SELECT * FROM avro_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_table +PREHOOK: query: SELECT * FROM parquet_table +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_table +#### A masked pattern was here #### +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +{"key1":null,"key2":"val2"} +{"key3":"val3","key4":null} +{"key3":"val3","key4":null} +PREHOOK: query: DROP TABLE avro_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_table +PREHOOK: Output: default@avro_table +POSTHOOK: query: DROP TABLE avro_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_table +POSTHOOK: Output: default@avro_table +PREHOOK: query: DROP TABLE parquet_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_table +PREHOOK: Output: default@parquet_table +POSTHOOK: query: DROP TABLE parquet_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_table +POSTHOOK: Output: default@parquet_table diff --git a/ql/src/test/results/clientpositive/parquet_map_of_maps.q.out b/ql/src/test/results/clientpositive/parquet_map_of_maps.q.out new file mode 100644 index 0000000..190132a --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_map_of_maps.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: -- this test reads and writes a parquet file with a map of maps + +CREATE TABLE parquet_map_of_maps ( + map_of_maps MAP> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_map_of_maps +POSTHOOK: query: -- this test reads and writes a parquet file with a map of maps + +CREATE TABLE parquet_map_of_maps ( + map_of_maps MAP> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_map_of_maps +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NestedMap.parquet' +OVERWRITE INTO TABLE parquet_map_of_maps +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_map_of_maps +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/NestedMap.parquet' +OVERWRITE INTO TABLE parquet_map_of_maps +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_map_of_maps +PREHOOK: query: CREATE TABLE parquet_map_of_maps_copy STORED AS PARQUET AS SELECT * FROM parquet_map_of_maps +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@parquet_map_of_maps +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_map_of_maps_copy +POSTHOOK: query: CREATE TABLE parquet_map_of_maps_copy STORED AS PARQUET AS SELECT * FROM parquet_map_of_maps +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@parquet_map_of_maps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_map_of_maps_copy +PREHOOK: query: SELECT * FROM parquet_map_of_maps_copy +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_map_of_maps_copy +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_map_of_maps_copy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_map_of_maps_copy +#### A masked pattern was here #### +{"b":{"b":-2,"a":-1},"a":{"b":1}} +PREHOOK: query: DROP TABLE parquet_map_of_maps +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_map_of_maps +PREHOOK: Output: default@parquet_map_of_maps +POSTHOOK: query: DROP TABLE parquet_map_of_maps +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_map_of_maps +POSTHOOK: Output: default@parquet_map_of_maps +PREHOOK: query: DROP TABLE parquet_map_of_maps_copy +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_map_of_maps_copy +PREHOOK: Output: default@parquet_map_of_maps_copy +POSTHOOK: query: DROP TABLE parquet_map_of_maps_copy +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_map_of_maps_copy +POSTHOOK: Output: default@parquet_map_of_maps_copy diff --git a/ql/src/test/results/clientpositive/parquet_nested_complex.q.out b/ql/src/test/results/clientpositive/parquet_nested_complex.q.out new file mode 100644 index 0000000..5e10e96 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_nested_complex.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: -- start with the original nestedcomplex test + +create table nestedcomplex ( +simple_int int, +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>, +max_nested_map array>>>>>>>>>>>>>>>>>>>>>, +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>, +simple_string string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'hive.serialization.extend.nesting.levels'='true', + 'line.delim'='\n' +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nestedcomplex +POSTHOOK: query: -- start with the original nestedcomplex test + +create table nestedcomplex ( +simple_int int, +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>, +max_nested_map array>>>>>>>>>>>>>>>>>>>>>, +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>, +simple_string string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'hive.serialization.extend.nesting.levels'='true', + 'line.delim'='\n' +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nestedcomplex +PREHOOK: query: describe nestedcomplex +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nestedcomplex +POSTHOOK: query: describe nestedcomplex +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nestedcomplex +simple_int int +max_nested_array array>>>>>>>>>>>>>>>>>>>>>> +max_nested_map array>>>>>>>>>>>>>>>>>>>>> +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>> +simple_string string +PREHOOK: query: describe extended nestedcomplex +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nestedcomplex +POSTHOOK: query: describe extended nestedcomplex +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nestedcomplex +simple_int int +max_nested_array array>>>>>>>>>>>>>>>>>>>>>> +max_nested_map array>>>>>>>>>>>>>>>>>>>>> +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>> +simple_string string + +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@nestedcomplex +POSTHOOK: query: load data local inpath '../../data/files/nested_complex.txt' overwrite into table nestedcomplex +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@nestedcomplex +PREHOOK: query: -- and load the table into Parquet + +CREATE TABLE parquet_nested_complex STORED AS PARQUET AS SELECT * FROM nestedcomplex +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@nestedcomplex +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_nested_complex +POSTHOOK: query: -- and load the table into Parquet + +CREATE TABLE parquet_nested_complex STORED AS PARQUET AS SELECT * FROM nestedcomplex +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@nestedcomplex +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_nested_complex +PREHOOK: query: SELECT * FROM parquet_nested_complex SORT BY simple_int +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_nested_complex +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_nested_complex SORT BY simple_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_nested_complex +#### A masked pattern was here #### +2 [[[[[[[[[[[[[[[[[[[[[[[0,3,2]]]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[{"k3":"v3","k1":"v1"}]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[[{"s":"b","i":10}]]]]]]]]]]]]]]]]]]]]]] 2 +3 [[[[[[[[[[[[[[[[[[[[[[[0,1,2]]]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[{"k2":"v2","k1":"v1"}]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[[{"s":"a","i":10}]]]]]]]]]]]]]]]]]]]]]] 2 +PREHOOK: query: DROP TABLE nestedcomplex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@nestedcomplex +PREHOOK: Output: default@nestedcomplex +POSTHOOK: query: DROP TABLE nestedcomplex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@nestedcomplex +POSTHOOK: Output: default@nestedcomplex +PREHOOK: query: DROP TABLE parquet_nested_complex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_nested_complex +PREHOOK: Output: default@parquet_nested_complex +POSTHOOK: query: DROP TABLE parquet_nested_complex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_nested_complex +POSTHOOK: Output: default@parquet_nested_complex diff --git a/ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives.q.out b/ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives.q.out new file mode 100644 index 0000000..f2b53c5 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_thrift_array_of_primitives.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives ( + list_of_ints ARRAY +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_primitives +POSTHOOK: query: -- this test creates a Parquet table with an array of structs + +CREATE TABLE parquet_thrift_array_of_primitives ( + list_of_ints ARRAY +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_primitives +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_primitives +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftPrimitiveInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_primitives +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_primitives +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_primitives +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_primitives +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_primitives +#### A masked pattern was here #### +[34,35,36] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_primitives +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_primitives +PREHOOK: Output: default@parquet_thrift_array_of_primitives +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_primitives +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_primitives +POSTHOOK: Output: default@parquet_thrift_array_of_primitives diff --git a/ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct.q.out b/ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct.q.out new file mode 100644 index 0000000..46473ab --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_thrift_array_of_single_field_struct.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs ( + single_element_groups ARRAY> +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs +POSTHOOK: query: -- this test creates a Parquet table with an array of single-field structs +-- as written by parquet-thrift + +CREATE TABLE parquet_thrift_array_of_single_field_structs ( + single_element_groups ARRAY> +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ThriftSingleFieldGroupInList.parquet' +OVERWRITE INTO TABLE parquet_thrift_array_of_single_field_structs +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs +PREHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_thrift_array_of_single_field_structs +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs +#### A masked pattern was here #### +[{"count":1234},{"count":2345}] +PREHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@parquet_thrift_array_of_single_field_structs +PREHOOK: Output: default@parquet_thrift_array_of_single_field_structs +POSTHOOK: query: DROP TABLE parquet_thrift_array_of_single_field_structs +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@parquet_thrift_array_of_single_field_structs +POSTHOOK: Output: default@parquet_thrift_array_of_single_field_structs diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index 568417a..07de2a1 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -2695,7 +2695,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -2707,11 +2707,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: t1 + alias: t2 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -2723,7 +2723,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -2775,7 +2775,7 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [t2, t1] + /pcr_t1/ds=2000-04-08 [t1, t2] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -2992,7 +2992,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -3004,11 +3004,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: t1 + alias: t2 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -3020,7 +3020,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: diff --git a/ql/src/test/results/clientpositive/ppd2.q.out b/ql/src/test/results/clientpositive/ppd2.q.out index 618a545..d22be6d 100644 --- a/ql/src/test/results/clientpositive/ppd2.q.out +++ b/ql/src/test/results/clientpositive/ppd2.q.out @@ -64,16 +64,12 @@ STAGE PLANS: Filter Operator predicate: (_col1 > 1) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -347,7 +343,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -356,8 +352,9 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -366,7 +363,6 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -376,16 +372,12 @@ STAGE PLANS: 1 outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -477,17 +469,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -497,22 +485,22 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: +- Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > 1) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/ppd_clusterby.q.out b/ql/src/test/results/clientpositive/ppd_clusterby.q.out index e3643f1..c727262 100644 --- a/ql/src/test/results/clientpositive/ppd_clusterby.q.out +++ b/ql/src/test/results/clientpositive/ppd_clusterby.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -80,8 +80,9 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -90,7 +91,6 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -100,16 +100,12 @@ STAGE PLANS: 1 outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -221,7 +217,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -230,8 +226,9 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: x + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 20) (type: boolean) @@ -240,7 +237,6 @@ STAGE PLANS: key expressions: '20' (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -250,16 +246,12 @@ STAGE PLANS: 1 outputColumnNames: _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/ppd_gby.q.out b/ql/src/test/results/clientpositive/ppd_gby.q.out index 3a6543b..1d7731f 100644 --- a/ql/src/test/results/clientpositive/ppd_gby.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby.q.out @@ -47,24 +47,20 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Filter Operator + predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/ppd_gby2.q.out b/ql/src/test/results/clientpositive/ppd_gby2.q.out index fd3866b..ec4c801 100644 --- a/ql/src/test/results/clientpositive/ppd_gby2.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby2.q.out @@ -50,29 +50,21 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Filter Operator + predicate: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) (type: boolean) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col0) + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col0) - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -185,22 +177,18 @@ STAGE PLANS: Filter Operator predicate: ((_col1 > 30) or (_col0 < 'val_400')) (type: boolean) Statistics: Num rows: 18 Data size: 190 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: max(_col0) + keys: _col1 (type: bigint) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 18 Data size: 190 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col0) - keys: _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/ppd_gby_join.q.out index a572726..deb9f69 100644 --- a/ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -31,39 +31,39 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -161,33 +161,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_join.q.out b/ql/src/test/results/clientpositive/ppd_join.q.out index 7071fc7..59f0fd3 100644 --- a/ql/src/test/results/clientpositive/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/ppd_join.q.out @@ -32,39 +32,39 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -564,33 +564,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key > '1') and (key < '400')) and (key > '20')) and ((value < 'val_50') or (key > '2'))) and (key <> '4')) (type: boolean) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key > '2') and (key < '400')) and (key <> '4')) and (key > '20')) (type: boolean) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out b/ql/src/test/results/clientpositive/ppd_join2.q.out index 10ffe4e..fd06f65 100644 --- a/ql/src/test/results/clientpositive/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_join2.q.out @@ -39,39 +39,39 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -1732,33 +1732,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '302') and (key < '400')) and value is not null) and (key <> '311')) and ((value <> 'val_50') or (key > '1'))) and (key <> '14')) (type: boolean) - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + predicate: ((((key <> '305') and (key < '400')) and (key <> '14')) and (key <> '311')) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index 2c77c8d..9706852 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -38,39 +38,39 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src @@ -1759,33 +1759,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan alias: src diff --git a/ql/src/test/results/clientpositive/ppd_join4.q.out b/ql/src/test/results/clientpositive/ppd_join4.q.out index de7923a..def2460 100644 --- a/ql/src/test/results/clientpositive/ppd_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_join4.q.out @@ -45,12 +45,12 @@ join test_tbl t3 on (t2.id=t3.id ) where t2.name='c' and t3.id='a' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -66,19 +66,20 @@ STAGE PLANS: sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: - Select Operator + Reduce Output Operator + key expressions: 'a' (type: string) + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: 'a' (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -88,11 +89,6 @@ STAGE PLANS: key expressions: 'a' (type: string) sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out index 16861c4..23d5046 100644 --- a/ql/src/test/results/clientpositive/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_join5.q.out @@ -53,6 +53,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id1 is not null and id2 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id1 (type: string), id2 (type: string) + sort order: ++ + Map-reduce partition columns: id1 (type: string), id2 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -64,17 +75,6 @@ STAGE PLANS: Map-reduce partition columns: id (type: string), id (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: d (type: int) - TableScan - alias: a - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id1 is not null and id2 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id1 (type: string), id2 (type: string) - sort order: ++ - Map-reduce partition columns: id1 (type: string), id2 (type: string) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/ppd_join_filter.q.out index 15a8a54..09aec4c 100644 --- a/ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -120,12 +120,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -136,24 +136,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -237,10 +233,20 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -255,16 +261,6 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -533,12 +529,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -549,24 +545,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -650,10 +642,20 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -668,16 +670,6 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -946,12 +938,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1059,10 +1051,20 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1077,16 +1079,6 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1355,12 +1347,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -1371,24 +1363,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1472,10 +1460,20 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1490,16 +1488,6 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 0 auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out index b04fd0c..7d3f200 100644 --- a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -65,8 +65,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -76,7 +77,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1304,7 +1304,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1314,8 +1314,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1325,7 +1326,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/ppd_outer_join1.q.out index f0163e0..d2e542f 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join1.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) @@ -41,7 +41,7 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) @@ -135,7 +135,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) @@ -147,7 +147,7 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/ppd_outer_join2.q.out b/ql/src/test/results/clientpositive/ppd_outer_join2.q.out index 311ec8c..dbcc587 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join2.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) @@ -41,7 +41,7 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) @@ -255,7 +255,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) @@ -267,7 +267,7 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) diff --git a/ql/src/test/results/clientpositive/ppd_outer_join3.q.out b/ql/src/test/results/clientpositive/ppd_outer_join3.q.out index ff84e9f..a58289a 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join3.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -38,7 +38,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -249,7 +249,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -258,7 +258,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out index e52872f..4289f58 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -35,7 +35,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) @@ -47,7 +47,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) @@ -57,8 +57,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) @@ -68,7 +69,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -390,7 +390,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) @@ -402,7 +402,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) @@ -412,8 +412,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) @@ -423,7 +424,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_outer_join5.q.out b/ql/src/test/results/clientpositive/ppd_outer_join5.q.out index 16d788b..9efaf4f 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join5.q.out @@ -43,14 +43,15 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t3 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: 20 (type: int) + key expressions: id (type: int) sort order: + + Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan @@ -66,15 +67,14 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan - alias: t1 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: id (type: int) + key expressions: 20 (type: int) sort order: + - Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: @@ -119,15 +119,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t3 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: id (type: int) + key expressions: 20 (type: int) sort order: + - Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan @@ -142,14 +141,15 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan - alias: t1 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: 20 (type: int) + key expressions: id (type: int) sort order: + + Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: @@ -194,15 +194,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t3 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: id (type: int) + key expressions: 20 (type: int) sort order: + - Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan @@ -217,14 +216,15 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan - alias: t1 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: 20 (type: int) + key expressions: id (type: int) sort order: + + Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/ppd_random.q.out b/ql/src/test/results/clientpositive/ppd_random.q.out index f21054a..e9322c2 100644 --- a/ql/src/test/results/clientpositive/ppd_random.q.out +++ b/ql/src/test/results/clientpositive/ppd_random.q.out @@ -28,39 +28,39 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -121,33 +121,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out index e8822b1..076ea83 100644 --- a/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out +++ b/ql/src/test/results/clientpositive/ppd_repeated_alias.q.out @@ -45,22 +45,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) sort order: + Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: bar (type: int) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) sort order: + Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: bar (type: int) Reduce Operator Tree: Join Operator condition map: @@ -116,22 +116,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) sort order: + Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: bar (type: int) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) sort order: + Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: bar (type: int) Reduce Operator Tree: Join Operator condition map: @@ -187,10 +187,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: foo is not null (type: boolean) + predicate: (foo is not null and (bar = 3)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) @@ -198,10 +198,10 @@ STAGE PLANS: Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (foo is not null and (bar = 3)) (type: boolean) + predicate: foo is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) @@ -235,7 +235,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[9][tables = [c, d]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [c, d]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- Q4: here, the filter c.bar should be created under the first join but above the second explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 PREHOOK: type: QUERY @@ -243,31 +243,31 @@ POSTHOOK: query: -- Q4: here, the filter c.bar should be created under the first explain select c.foo, d.bar from (select c.foo, b.bar, c.blah from pokes c left outer join pokes b on c.foo=b.foo) c left outer join pokes d where d.foo=1 and c.bar=2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) sort order: + Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: bar (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: foo (type: int) sort order: + Map-reduce partition columns: foo (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: bar (type: int) Reduce Operator Tree: Join Operator condition map: @@ -291,21 +291,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: foo (type: int), bar (type: int) + value expressions: _col0 (type: int) TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: foo (type: int), bar (type: int) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/ppd_transform.q.out b/ql/src/test/results/clientpositive/ppd_transform.q.out index f12ff07..0be845f 100644 --- a/ql/src/test/results/clientpositive/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/ppd_transform.q.out @@ -52,17 +52,13 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -388,31 +384,23 @@ STAGE PLANS: Filter Operator predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/ppd_udf_case.q.out b/ql/src/test/results/clientpositive/ppd_udf_case.q.out index 1826993..1ac9368 100644 --- a/ql/src/test/results/clientpositive/ppd_udf_case.q.out +++ b/ql/src/test/results/clientpositive/ppd_udf_case.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) @@ -46,7 +46,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string), hr (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) @@ -179,7 +179,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) @@ -191,7 +191,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string), hr (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and CASE (key) WHEN ('27') THEN (true) WHEN ('38') THEN (false) ELSE (null) END) (type: boolean) diff --git a/ql/src/test/results/clientpositive/ppd_union_view.q.out b/ql/src/test/results/clientpositive/ppd_union_view.q.out index d5863ef..f7410e2 100644 --- a/ql/src/test/results/clientpositive/ppd_union_view.q.out +++ b/ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -160,45 +160,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: t1_mapping - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + alias: t1_old + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: keymap (type: string), '2011-10-13' (type: string) sort order: ++ Map-reduce partition columns: keymap (type: string) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key (type: string) + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false TableScan - alias: t1_old - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + alias: t1_mapping + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: keymap (type: string), '2011-10-13' (type: string) sort order: ++ Map-reduce partition columns: keymap (type: string) - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -458,12 +458,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Needs Tagging: true Reduce Operator Tree: @@ -501,35 +501,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - GatherStats: false - Union - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan alias: t1_new Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -564,6 +535,35 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/ppd_vc.q.out b/ql/src/test/results/clientpositive/ppd_vc.q.out index 521bc8d..9622faa 100644 --- a/ql/src/test/results/clientpositive/ppd_vc.q.out +++ b/ql/src/test/results/clientpositive/ppd_vc.q.out @@ -409,21 +409,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false - TableScan alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -443,6 +428,21 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out index 2f77de9..fdc1dc6 100644 --- a/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out +++ b/ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out @@ -459,17 +459,13 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -478,16 +474,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 120 Data size: 12099 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -608,17 +604,13 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -627,16 +619,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -907,12 +899,12 @@ select l_shipdate, l_orderkey as cnt from lineitem_ix) dummy POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -941,35 +933,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 47 Data size: 4432 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 163 Data size: 16531 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 163 Data size: 16531 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 163 Data size: 16531 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan alias: lineitem_ix Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -989,6 +963,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 163 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 163 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 163 Data size: 16531 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1962,20 +1950,16 @@ STAGE PLANS: Filter Operator predicate: (value = key) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int) - outputColumnNames: key, value + Group By Operator + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: key (type: int), value (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -2018,20 +2002,16 @@ STAGE PLANS: Filter Operator predicate: (value = key) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: int) - outputColumnNames: key, value + Group By Operator + keys: key (type: int), substr(value, 2, 3) (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: key (type: int), substr(value, 2, 3) (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: string) @@ -2268,22 +2248,18 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Group By Operator + aggregations: count(key) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(key) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2497,17 +2473,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2516,16 +2488,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2589,17 +2561,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2608,16 +2576,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/quotedid_skew.q.out b/ql/src/test/results/clientpositive/quotedid_skew.q.out index 95ca950..86e379e 100644 --- a/ql/src/test/results/clientpositive/quotedid_skew.q.out +++ b/ql/src/test/results/clientpositive/quotedid_skew.q.out @@ -60,7 +60,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (!@#$%^&*()_q is not null and (not (!@#$%^&*()_q = '2'))) (type: boolean) + predicate: (!@#$%^&*()_q is not null and (!@#$%^&*()_q = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: !@#$%^&*()_q (type: string) @@ -72,7 +72,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (!@#$%^&*()_q is not null and (not (!@#$%^&*()_q = '2'))) (type: boolean) + predicate: (!@#$%^&*()_q is not null and (!@#$%^&*()_q = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: !@#$%^&*()_q (type: string) @@ -106,29 +106,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -137,7 +131,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (!@#$%^&*()_q is not null and (!@#$%^&*()_q = '2')) (type: boolean) + predicate: (!@#$%^&*()_q is not null and (not (!@#$%^&*()_q = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: !@#$%^&*()_q (type: string) @@ -149,7 +143,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (!@#$%^&*()_q is not null and (!@#$%^&*()_q = '2')) (type: boolean) + predicate: (!@#$%^&*()_q is not null and (not (!@#$%^&*()_q = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: !@#$%^&*()_q (type: string) diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out index fae0b83..5e878f6 100644 --- a/ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out +++ b/ql/src/test/results/clientpositive/reduce_deduplicate_exclude_join.q.out @@ -3,13 +3,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select * from src cluster by key) a join src b on a.key = b.key limit 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-5 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -40,7 +40,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: b @@ -62,7 +62,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 key (type: string) - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out index 58757d4..b8ea616 100644 --- a/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out +++ b/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -243,28 +243,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -274,22 +274,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -342,28 +338,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -373,16 +369,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -510,27 +502,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string) + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string) - mode: complete + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -2742,28 +2730,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2773,16 +2761,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2834,28 +2818,28 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2865,16 +2849,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2992,27 +2972,23 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string) + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string) - mode: complete + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/regex_col.q.out b/ql/src/test/results/clientpositive/regex_col.q.out index abb6aa7..7087f42 100644 --- a/ql/src/test/results/clientpositive/regex_col.q.out +++ b/ql/src/test/results/clientpositive/regex_col.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) @@ -96,7 +96,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: ds (type: string), hr (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) @@ -154,7 +154,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 103) (type: boolean) @@ -165,7 +165,7 @@ STAGE PLANS: Map-reduce partition columns: hr (type: string), ds (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 103) (type: boolean) diff --git a/ql/src/test/results/clientpositive/router_join_ppr.q.out b/ql/src/test/results/clientpositive/router_join_ppr.q.out index 95c51ba..b89d92a 100644 --- a/ql/src/test/results/clientpositive/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -113,36 +113,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -585,35 +585,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -954,35 +954,35 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 value expressions: value (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 0 + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -1323,36 +1323,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) auto parallelism: false TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/sample10.q.out b/ql/src/test/results/clientpositive/sample10.q.out index 6de9ea8..fbccc0a 100644 --- a/ql/src/test/results/clientpositive/sample10.q.out +++ b/ql/src/test/results/clientpositive/sample10.q.out @@ -323,28 +323,24 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -354,7 +350,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -388,13 +384,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/sample8.q.out b/ql/src/test/results/clientpositive/sample8.q.out index f42c2d4..df76d4d 100644 --- a/ql/src/test/results/clientpositive/sample8.q.out +++ b/ql/src/test/results/clientpositive/sample8.q.out @@ -93,34 +93,34 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: s + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: (((((hash(key) & 2147483647) % 1) = 0) and value is not null) and (((hash(key) & 2147483647) % 10) = 0)) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (((((hash(key) & 2147483647) % 10) = 0) and value is not null) and (((hash(key) & 2147483647) % 1) = 0)) (type: boolean) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + tag: 0 auto parallelism: false TableScan - alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: t + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true - predicate: (((((hash(key) & 2147483647) % 10) = 0) and value is not null) and (((hash(key) & 2147483647) % 1) = 0)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + predicate: (((((hash(key) & 2147483647) % 1) = 0) and value is not null) and (((hash(key) & 2147483647) % 10) = 0)) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - tag: 0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -310,7 +310,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [t, s] + /srcpart/ds=2008-04-08/hr=11 [s, t] /srcpart/ds=2008-04-08/hr=12 [t] /srcpart/ds=2008-04-09/hr=11 [t] /srcpart/ds=2008-04-09/hr=12 [t] @@ -749,30 +749,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Row Limit Per Split: 10 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: a + Row Limit Per Split: 100 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a - Row Limit Per Split: 100 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Row Limit Per Split: 10 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reduce Operator Tree: Join Operator @@ -832,30 +832,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Row Limit Per Split: 10 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: a + Row Limit Per Split: 100 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) TableScan - alias: a - Row Limit Per Split: 100 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Row Limit Per Split: 10 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out b/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out index 39de7fe..71b46e0 100644 --- a/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out +++ b/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out @@ -1,11 +1,11 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@sih_i_part -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) diff --git a/ql/src/test/results/clientpositive/script_pipe.q.out b/ql/src/test/results/clientpositive/script_pipe.q.out index cde37a6..e19fc06 100644 --- a/ql/src/test/results/clientpositive/script_pipe.q.out +++ b/ql/src/test/results/clientpositive/script_pipe.q.out @@ -34,24 +34,20 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Transform Operator + command: true + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Transform Operator - command: true - output info: + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/select_transform_hint.q.out b/ql/src/test/results/clientpositive/select_transform_hint.q.out index 859c792..a15d104 100644 --- a/ql/src/test/results/clientpositive/select_transform_hint.q.out +++ b/ql/src/test/results/clientpositive/select_transform_hint.q.out @@ -58,24 +58,20 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Transform Operator - command: cat - output info: + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -1148,7 +1144,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1158,8 +1154,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1169,7 +1166,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1179,24 +1175,20 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Transform Operator - command: cat - output info: + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/semijoin.q.out b/ql/src/test/results/clientpositive/semijoin.q.out index 920b2c7..45e9c1f 100644 --- a/ql/src/test/results/clientpositive/semijoin.q.out +++ b/ql/src/test/results/clientpositive/semijoin.q.out @@ -128,6 +128,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -147,18 +159,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -168,16 +168,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -236,6 +232,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -255,18 +263,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -276,16 +272,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -346,6 +338,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -365,18 +369,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -386,16 +378,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -448,26 +436,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int), key (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - TableScan alias: a Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -479,6 +447,22 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + TableScan + alias: b + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 15) (type: boolean) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -561,6 +545,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -580,18 +576,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -601,16 +585,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -666,18 +646,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - TableScan alias: t3 Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -697,6 +665,18 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -913,16 +893,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1027,14 +1003,10 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1099,6 +1071,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1118,18 +1102,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: (2 * _col0) (type: int) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1139,16 +1111,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1205,6 +1173,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1236,18 +1216,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1338,6 +1306,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1357,17 +1336,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) - Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1377,16 +1345,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1533,14 +1497,10 @@ STAGE PLANS: 2 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1602,6 +1562,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1626,14 +1594,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1645,16 +1605,12 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1735,6 +1691,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1759,14 +1723,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1778,16 +1734,12 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1871,6 +1823,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1895,14 +1855,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1914,16 +1866,12 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2007,6 +1955,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2031,14 +1987,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2050,16 +1998,12 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2143,6 +2087,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2167,14 +2119,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2186,16 +2130,12 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2281,6 +2221,14 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2305,14 +2253,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -2324,16 +2264,12 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -2431,6 +2367,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2450,18 +2398,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -2505,16 +2441,12 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -2603,6 +2535,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and (key > 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2622,18 +2566,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value is not null and (key > 100)) (type: boolean) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index deb0a8f..c9a0e82 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -101,9 +101,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -113,6 +112,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -238,7 +238,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -274,7 +274,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -358,7 +358,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -394,7 +394,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -471,18 +471,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -494,6 +482,18 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), val (type: string) + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: @@ -503,21 +503,17 @@ STAGE PLANS: 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -629,21 +625,17 @@ STAGE PLANS: handleSkewJoin: true outputColumnNames: _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator @@ -679,19 +671,16 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -813,21 +802,17 @@ STAGE PLANS: handleSkewJoin: true outputColumnNames: _col2, _col3 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator @@ -863,19 +848,16 @@ STAGE PLANS: 0 reducesinkkey0 (type: string), reducesinkkey1 (type: double) 1 reducesinkkey0 (type: string), reducesinkkey1 (type: double) outputColumnNames: _col2, _col3 - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -970,33 +952,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1024,21 +1006,17 @@ STAGE PLANS: handleSkewJoin: true outputColumnNames: _col0, _col3 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -1093,19 +1071,16 @@ STAGE PLANS: 1 reducesinkkey0 (type: string) 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1185,19 +1160,16 @@ STAGE PLANS: 1 reducesinkkey0 (type: string) 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1280,19 +1252,15 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col6 Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out index b70d2a8..01e2bef 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out @@ -102,7 +102,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -121,21 +121,18 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -154,16 +151,13 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -253,7 +247,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -272,21 +266,18 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -305,16 +296,13 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -406,7 +394,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -418,27 +406,22 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator + Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -450,22 +433,17 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator + Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -561,7 +539,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -573,27 +551,22 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator + Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -605,22 +578,17 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator + Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out index 3fc88cf..3329e73 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out @@ -142,8 +142,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -154,29 +154,26 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -187,23 +184,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -293,8 +287,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -305,29 +299,26 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -338,23 +329,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -446,8 +434,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -457,29 +445,24 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -489,23 +472,18 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -601,8 +579,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -612,29 +590,24 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 @@ -644,23 +617,18 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out index ea180ec..cad15f4 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out @@ -106,7 +106,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -125,21 +125,18 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -158,16 +155,13 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out index 6b66d6c..875b511 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out @@ -106,7 +106,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -125,21 +125,18 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -158,16 +155,13 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -219,7 +213,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -231,7 +225,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -265,29 +259,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -296,7 +284,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -308,7 +296,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out index bd2c108..b749eee 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out @@ -106,7 +106,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -125,21 +125,18 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -158,16 +155,13 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out index ebe238e..8182f1f 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out @@ -164,7 +164,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -186,21 +186,18 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -222,16 +219,13 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin5.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin5.q.out index 19f225d..f75fc4a 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin5.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin5.q.out @@ -238,22 +238,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -261,16 +257,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin6.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin6.q.out index 9403c4f..4312415 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin6.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin6.q.out @@ -104,8 +104,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '8'))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (key = '8')) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -116,23 +116,41 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col6 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + Lateral View Forward Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + UDTF Operator Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -147,33 +165,12 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not (key = '8'))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -184,23 +181,41 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col6 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + Lateral View Forward Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + UDTF Operator Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -215,27 +230,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out index 1b64a24..20a5a31 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out @@ -65,23 +65,23 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-16 is a root stage Stage-2 depends on stages: Stage-16 - Stage-3 depends on stages: Stage-2, Stage-8 + Stage-3 depends on stages: Stage-2, Stage-6 Stage-17 is a root stage - Stage-8 depends on stages: Stage-17 + Stage-6 depends on stages: Stage-17 Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery2:subq1-subquery2:a + null-subquery1:subq1-subquery1:a Fetch Operator limit: -1 subquery1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery2:subq1-subquery2:a + null-subquery1:subq1-subquery1:a TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE @@ -117,7 +117,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -136,20 +136,17 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -168,15 +165,12 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -215,14 +209,14 @@ STAGE PLANS: Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery1:subq1-subquery1:a + null-subquery2:subq1-subquery2:a Fetch Operator limit: -1 subquery2:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery1:subq1-subquery1:a + null-subquery2:subq1-subquery2:a TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE @@ -251,7 +245,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-8 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -277,15 +271,12 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE @@ -309,15 +300,12 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin9.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin9.q.out index d4b8354..4073363 100644 --- a/ql/src/test/results/clientpositive/skewjoin_mapjoin9.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_mapjoin9.q.out @@ -151,7 +151,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -166,36 +166,33 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -210,31 +207,28 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {val} + keys: + 0 _col1 (type: string) + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_noskew.q.out b/ql/src/test/results/clientpositive/skewjoin_noskew.q.out index c92b3ce..e995dc0 100644 --- a/ql/src/test/results/clientpositive/skewjoin_noskew.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_noskew.q.out @@ -19,7 +19,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -29,8 +29,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -40,7 +41,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -51,16 +51,12 @@ STAGE PLANS: handleSkewJoin: true outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Conditional Operator @@ -96,15 +92,12 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out b/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out index ab5da4d..bd6e10f 100644 --- a/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_1.q.out @@ -73,7 +73,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -85,7 +85,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -121,7 +121,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -133,7 +133,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -209,7 +209,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -221,7 +221,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -257,7 +257,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -269,7 +269,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -353,7 +353,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -365,7 +365,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -412,7 +412,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -424,7 +424,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -507,7 +507,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -519,7 +519,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -566,7 +566,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -578,7 +578,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out index e87d22b..f2a4141 100644 --- a/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/skewjoin_union_remove_2.q.out @@ -82,10 +82,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -94,10 +94,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -106,10 +106,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -147,7 +147,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -159,7 +159,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -171,7 +171,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt1.q.out b/ql/src/test/results/clientpositive/skewjoinopt1.q.out index 06cd471..d81f582 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt1.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt1.q.out @@ -60,7 +60,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -72,7 +72,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -106,29 +106,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -137,7 +131,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -149,7 +143,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -225,7 +219,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -237,7 +231,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -271,29 +265,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -302,7 +290,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -314,7 +302,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -392,7 +380,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -403,7 +391,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -418,14 +406,12 @@ STAGE PLANS: 0 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -433,33 +419,27 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -485,7 +465,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -496,7 +476,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -511,14 +491,12 @@ STAGE PLANS: 0 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -557,7 +535,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -568,7 +546,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -583,14 +561,12 @@ STAGE PLANS: 0 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -598,33 +574,27 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -650,7 +620,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -661,7 +631,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -676,14 +646,12 @@ STAGE PLANS: 0 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt10.q.out b/ql/src/test/results/clientpositive/skewjoinopt10.q.out index 01a2cd2..aca889e 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt10.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt10.q.out @@ -50,8 +50,8 @@ select * from (select a.key as key, b.value as array_val from T1 a join array_va POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -62,7 +62,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '8'))) (type: boolean) + predicate: (key is not null and (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -73,13 +73,13 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '8'))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (key = '8')) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: array) Reduce Operator Tree: Join Operator @@ -89,11 +89,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 {VALUE._col0} outputColumnNames: _col0, _col6 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -107,16 +107,34 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + Lateral View Forward Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + UDTF Operator Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -131,40 +149,37 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + Lateral View Forward Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + UDTF Operator Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -179,54 +194,33 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: (key is not null and (not (key = '8'))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: array) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -235,11 +229,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 {VALUE._col0} outputColumnNames: _col0, _col6 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/skewjoinopt11.q.out b/ql/src/test/results/clientpositive/skewjoinopt11.q.out index 7b431dd..910c1cc 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt11.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt11.q.out @@ -60,12 +60,12 @@ select * from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-8 + Stage-2 depends on stages: Stage-1, Stage-8 + Stage-3 depends on stages: Stage-2, Stage-6 Stage-5 is a root stage - Stage-7 is a root stage - Stage-8 depends on stages: Stage-7, Stage-9 - Stage-9 is a root stage + Stage-6 depends on stages: Stage-5, Stage-10 + Stage-8 is a root stage + Stage-10 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -76,7 +76,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -88,7 +88,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -122,27 +122,21 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -223,14 +217,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: a + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TableScan + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -239,10 +255,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -270,35 +286,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TableScan - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-9 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/skewjoinopt12.q.out b/ql/src/test/results/clientpositive/skewjoinopt12.q.out index 6d6a526..5c0daf6 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt12.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt12.q.out @@ -62,7 +62,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -73,7 +73,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -106,29 +106,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -137,7 +131,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -148,7 +142,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt13.q.out b/ql/src/test/results/clientpositive/skewjoinopt13.q.out index 79f730c..edd59c6 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt13.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt13.q.out @@ -84,10 +84,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and val is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -96,10 +96,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and val is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt14.q.out b/ql/src/test/results/clientpositive/skewjoinopt14.q.out index 206d6ce..87097bb 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt14.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt14.q.out @@ -80,8 +80,8 @@ join T3 c on a.val = c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-5 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -92,7 +92,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) + predicate: ((key is not null and val is not null) and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -134,27 +134,12 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - TableScan - Union - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE @@ -167,6 +152,15 @@ STAGE PLANS: Map-reduce partition columns: val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string) + TableScan + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -188,14 +182,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -207,7 +201,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (key = '2')) (type: boolean) + predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt15.q.out b/ql/src/test/results/clientpositive/skewjoinopt15.q.out index edf2dee..def498c 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt15.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt15.q.out @@ -100,25 +100,25 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) Reduce Operator Tree: Join Operator @@ -128,11 +128,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -146,29 +146,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -177,25 +171,25 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) Reduce Operator Tree: Join Operator @@ -205,11 +199,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -265,25 +259,25 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) Reduce Operator Tree: Join Operator @@ -293,11 +287,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -311,29 +305,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -342,25 +330,25 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) Reduce Operator Tree: Join Operator @@ -370,11 +358,11 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false table: @@ -432,24 +420,24 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -457,15 +445,13 @@ STAGE PLANS: condition expressions: 0 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -473,33 +459,27 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) TableScan Union Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -525,24 +505,24 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -550,15 +530,13 @@ STAGE PLANS: condition expressions: 0 1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -597,24 +575,24 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -622,15 +600,13 @@ STAGE PLANS: condition expressions: 0 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -638,33 +614,27 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) TableScan Union Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - SELECT * : (no compute) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -690,24 +660,24 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -715,15 +685,13 @@ STAGE PLANS: condition expressions: 0 1 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt16.q.out b/ql/src/test/results/clientpositive/skewjoinopt16.q.out index 16a18ba..01589fe 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt16.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt16.q.out @@ -62,7 +62,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -73,7 +73,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -106,29 +106,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -137,7 +131,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -148,7 +142,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt17.q.out b/ql/src/test/results/clientpositive/skewjoinopt17.q.out index 04c6960..058120a 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt17.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt17.q.out @@ -66,7 +66,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -78,7 +78,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -112,29 +112,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -143,7 +137,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -155,7 +149,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -287,7 +281,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -298,7 +292,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -331,29 +325,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -362,7 +350,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -373,7 +361,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt18.q.out b/ql/src/test/results/clientpositive/skewjoinopt18.q.out index 87de96b..24d7d78 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt18.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt18.q.out @@ -83,29 +83,29 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(key) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string), val (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) TableScan - alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(key) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/skewjoinopt19.q.out b/ql/src/test/results/clientpositive/skewjoinopt19.q.out index 243304b..57c10dc 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt19.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt19.q.out @@ -64,7 +64,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -110,29 +110,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -141,7 +135,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -153,7 +147,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt2.q.out b/ql/src/test/results/clientpositive/skewjoinopt2.q.out index bd5f85e..b4f2abe 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt2.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt2.q.out @@ -70,7 +70,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -81,7 +81,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -114,29 +114,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -145,7 +139,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -156,7 +150,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -228,7 +222,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -239,7 +233,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -272,29 +266,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -303,7 +291,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -314,7 +302,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -390,7 +378,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -401,7 +389,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -417,16 +405,12 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -434,39 +418,33 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -493,7 +471,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -504,7 +482,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -520,16 +498,12 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -569,7 +543,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -580,7 +554,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -596,16 +570,12 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -613,39 +583,33 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -672,7 +636,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -683,7 +647,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -699,16 +663,12 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/skewjoinopt20.q.out b/ql/src/test/results/clientpositive/skewjoinopt20.q.out index aef3e82..02f5f3c 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt20.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt20.q.out @@ -64,7 +64,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -110,29 +110,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -141,7 +135,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -153,7 +147,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt3.q.out b/ql/src/test/results/clientpositive/skewjoinopt3.q.out index d36f9db..ce8e02e 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -64,7 +64,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -110,29 +110,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -141,7 +135,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -153,7 +147,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -229,7 +223,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -241,7 +235,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -275,29 +269,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -306,7 +294,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -318,7 +306,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt4.q.out b/ql/src/test/results/clientpositive/skewjoinopt4.q.out index 8298039..159f91a 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt4.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt4.q.out @@ -60,7 +60,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -72,7 +72,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -106,29 +106,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -137,7 +131,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -149,7 +143,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -223,7 +217,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -235,7 +229,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -269,29 +263,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -300,7 +288,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -312,7 +300,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt5.q.out b/ql/src/test/results/clientpositive/skewjoinopt5.q.out index 165e6c6..1358cab 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt5.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt5.q.out @@ -62,7 +62,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -74,7 +74,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -108,29 +108,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -139,7 +133,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -151,7 +145,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt6.q.out b/ql/src/test/results/clientpositive/skewjoinopt6.q.out index 790bc61..233e584 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt6.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt6.q.out @@ -64,7 +64,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -76,7 +76,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -110,29 +110,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Map Reduce @@ -141,7 +135,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -153,7 +147,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt7.q.out b/ql/src/test/results/clientpositive/skewjoinopt7.q.out index 3a13ffa..8bebf7a 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt7.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt7.q.out @@ -77,10 +77,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -89,10 +89,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -101,10 +101,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -140,29 +140,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -171,7 +165,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -183,7 +177,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -195,7 +189,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt8.q.out b/ql/src/test/results/clientpositive/skewjoinopt8.q.out index e960354..d3a7461 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt8.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt8.q.out @@ -75,10 +75,10 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -87,10 +87,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -99,10 +99,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -138,29 +138,23 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - SELECT * : (no compute) + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -169,7 +163,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -181,7 +175,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -193,7 +187,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/skewjoinopt9.q.out b/ql/src/test/results/clientpositive/skewjoinopt9.q.out index 58dad77..25e11e9 100644 --- a/ql/src/test/results/clientpositive/skewjoinopt9.q.out +++ b/ql/src/test/results/clientpositive/skewjoinopt9.q.out @@ -65,18 +65,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - TableScan alias: t1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -112,6 +100,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string) + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Reduce Operator Tree: Join Operator condition map: @@ -208,22 +208,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -231,16 +227,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/smb_mapjoin9.q.out b/ql/src/test/results/clientpositive/smb_mapjoin9.q.out index e566384..c40e14d 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -290,14 +290,14 @@ ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS N POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-9 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-9 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-9 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-9 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -330,10 +330,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_mapjoin9_results - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -354,10 +354,10 @@ STAGE PLANS: serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_mapjoin9_results - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -369,7 +369,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_mapjoin9_results - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -381,7 +381,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_mapjoin9_results - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index 7c1ff4a..2088ed6 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -122,7 +122,7 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -260,7 +260,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 12dd610..24e6bdb 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -134,7 +134,7 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -274,7 +274,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### @@ -397,7 +397,7 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -537,7 +537,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_14.q.out index 48fd95d..56a8f5b 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_14.q.out @@ -73,14 +73,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -162,34 +161,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -266,8 +259,8 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -289,19 +282,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -320,7 +310,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -418,14 +408,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -517,14 +506,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -644,14 +632,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -761,14 +748,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -882,17 +868,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -975,14 +959,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1060,14 +1043,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1158,14 +1140,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1265,14 +1246,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_16.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_16.q.out index 15568dd..a68dd5d 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_16.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_16.q.out @@ -65,14 +65,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out index d91dd1d..52c9fa6 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_17.q.out @@ -218,14 +218,13 @@ STAGE PLANS: 4 key (type: int) 5 key (type: int) 6 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -337,14 +336,13 @@ STAGE PLANS: 4 key (type: int) 5 key (type: int) 6 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -457,14 +455,13 @@ STAGE PLANS: 5 key (type: int) 6 key (type: int) 7 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -640,7 +637,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) TableScan - alias: t + alias: q Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -648,7 +645,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: s + alias: r Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -656,7 +653,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: r + alias: s Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -664,7 +661,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE TableScan - alias: q + alias: t Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 937b284..cb7d65c 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -63,25 +63,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 5 (type: int) sort order: + - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a - Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 5 (type: int) sort order: + - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -144,25 +144,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 5 (type: int) sort order: + - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 5 (type: int) sort order: + - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_6.q.out index ba6b107..1eb7afb 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_6.q.out @@ -64,13 +64,13 @@ select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b. POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -103,10 +103,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -122,10 +122,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -137,7 +137,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -149,7 +149,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -1249,13 +1249,13 @@ select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b. POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -1288,10 +1288,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -1307,10 +1307,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1322,7 +1322,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1334,7 +1334,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -2450,13 +2450,13 @@ select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b. POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -2489,10 +2489,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -2508,10 +2508,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2523,7 +2523,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2535,7 +2535,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true @@ -2567,13 +2567,13 @@ select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b. POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -2606,10 +2606,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -2625,10 +2625,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2640,7 +2640,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -2652,7 +2652,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index c4dda71..aaf43bc 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -616,13 +616,13 @@ select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -652,10 +652,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -671,10 +671,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -686,7 +686,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -698,7 +698,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out index 55ee521..29fab1f 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_1.q.out @@ -77,14 +77,13 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out index 0000c23..9e58333 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_2.q.out @@ -85,14 +85,13 @@ STAGE PLANS: keys: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out index 5631477..cac9591 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_3.q.out @@ -85,14 +85,13 @@ STAGE PLANS: keys: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out index a0fbc2a..0e40242 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_4.q.out @@ -107,17 +107,15 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index dc76dff..3abe28e 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -137,16 +137,15 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index b41cf22..25b88f3 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -218,19 +218,17 @@ STAGE PLANS: Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index 1354e2d..c7cbacb 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -291,19 +291,17 @@ STAGE PLANS: 1 key (type: int) Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out index 3f23f5a..48e6af8 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out @@ -130,14 +130,13 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -231,17 +230,15 @@ STAGE PLANS: 0 key (type: string), value2 (type: string) 1 key (type: string), value2 (type: string) Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 873390d..ce8e751 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -164,7 +164,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: d @@ -186,7 +186,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: e @@ -205,7 +205,7 @@ STAGE PLANS: 1 deptid (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: dept @@ -267,7 +267,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: emp @@ -286,7 +286,7 @@ STAGE PLANS: 1 deptid (type: int), deptname (type: string) outputColumnNames: _col0, _col1, _col2, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) @@ -325,7 +325,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: d @@ -347,7 +347,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: e @@ -366,7 +366,7 @@ STAGE PLANS: 1 deptid (type: int), deptname (type: string) outputColumnNames: _col0, _col1, _col2, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) @@ -406,7 +406,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: dept @@ -428,7 +428,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: emp @@ -447,7 +447,7 @@ STAGE PLANS: 1 deptid (type: int), deptname (type: string), deptname (type: string) outputColumnNames: _col0, _col1, _col2, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) @@ -490,7 +490,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: d @@ -509,7 +509,7 @@ STAGE PLANS: 2 deptid (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: e1 @@ -533,7 +533,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: e @@ -555,8 +555,8 @@ STAGE PLANS: 2 deptid (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: int) @@ -594,7 +594,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: d @@ -637,7 +637,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: e @@ -659,7 +659,7 @@ STAGE PLANS: 2 locid (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 input vertices: - 1 Map 1 + 1 Map 2 2 Map 3 Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -700,7 +700,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: d @@ -743,7 +743,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: e @@ -765,7 +765,7 @@ STAGE PLANS: 2 locid (type: int), state (type: string) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 input vertices: - 1 Map 1 + 1 Map 2 2 Map 3 Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join0.q.out b/ql/src/test/results/clientpositive/spark/auto_join0.q.out index bade040..4f9d44d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -84,14 +84,10 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/auto_join1.q.out b/ql/src/test/results/clientpositive/spark/auto_join1.q.out index ef02be4..3e76906 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -47,7 +47,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -66,7 +66,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join10.q.out b/ql/src/test/results/clientpositive/spark/auto_join10.q.out index b465939..56f53c4 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join10.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -77,19 +77,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/auto_join11.q.out b/ql/src/test/results/clientpositive/spark/auto_join11.q.out index d2db28d..c35de88 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join11.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -49,10 +49,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -75,24 +75,20 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join12.q.out b/ql/src/test/results/clientpositive/spark/auto_join12.q.out index b040cc5..a62e54d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join12.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -80,10 +80,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -109,25 +109,21 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 + 1 Map 3 2 Map 4 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join13.q.out b/ql/src/test/results/clientpositive/spark/auto_join13.q.out index cc78fe2..7175309 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join13.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join13.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -76,10 +76,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -102,7 +102,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2, _col3 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col2) is not null (type: boolean) @@ -120,22 +120,18 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join14.q.out b/ql/src/test/results/clientpositive/spark/auto_join14.q.out index 5478beb..cbca649 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -51,7 +51,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -70,7 +70,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join15.q.out b/ql/src/test/results/clientpositive/spark/auto_join15.q.out index 85c1f95..d37ac5b 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -45,11 +45,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -68,7 +68,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -80,7 +80,7 @@ STAGE PLANS: Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) @@ -95,7 +95,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join16.q.out b/ql/src/test/results/clientpositive/spark/auto_join16.q.out index f6f422b..5aba324 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join16.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -73,19 +73,15 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/auto_join17.q.out b/ql/src/test/results/clientpositive/spark/auto_join17.q.out index 0ac58f8..f18d832 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -47,7 +47,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -66,7 +66,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join18.q.out b/ql/src/test/results/clientpositive/spark/auto_join18.q.out index 4efb0a5..ec5cc28 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -32,8 +32,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### @@ -41,26 +41,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -79,24 +59,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -106,20 +102,16 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -141,21 +133,17 @@ STAGE PLANS: Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 6711da5..7e93e19 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -34,8 +34,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### @@ -43,26 +43,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -81,24 +61,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -108,20 +104,16 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -143,21 +135,17 @@ STAGE PLANS: Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out index 8cb2969..c121aaa 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -49,7 +49,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -68,7 +68,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col8 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join2.q.out b/ql/src/test/results/clientpositive/spark/auto_join2.q.out index 86a9b9e..fe71679 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -26,7 +26,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -48,7 +48,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -67,7 +67,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) @@ -86,7 +86,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -105,7 +105,7 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col11 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join20.q.out b/ql/src/test/results/clientpositive/spark/auto_join20.q.out index 31ee1bc..f00c855 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -24,18 +24,18 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -47,18 +47,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 5 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -74,11 +74,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 3 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -101,8 +101,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 5 - 1 Map 4 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -114,7 +114,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -129,7 +129,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -199,18 +199,18 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) + predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -222,18 +222,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 5 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) + predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -249,11 +249,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 3 (SORT, 3) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -276,8 +276,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 5 - 1 Map 4 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -289,7 +289,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -304,7 +304,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join21.q.out b/ql/src/test/results/clientpositive/spark/auto_join21.q.out index 26174be..ca5c455 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -17,6 +17,26 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -37,34 +57,14 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -87,8 +87,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -100,7 +100,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join22.q.out b/ql/src/test/results/clientpositive/spark/auto_join22.q.out index 9e4bccb..a12535e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join22.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join22.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -52,10 +52,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -74,7 +74,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -108,7 +108,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join23.q.out b/ql/src/test/results/clientpositive/spark/auto_join23.q.out index b57e426..ad63d78 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join23.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -35,10 +35,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -57,7 +57,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join24.q.out b/ql/src/test/results/clientpositive/spark/auto_join24.q.out index c725c96..88c6890 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join24.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join24.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -55,10 +55,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -77,24 +77,20 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join26.q.out b/ql/src/test/results/clientpositive/spark/auto_join26.q.out index 62d84b8..f6b3319 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: x @@ -50,10 +50,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -72,27 +72,23 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join27.q.out b/ql/src/test/results/clientpositive/spark/auto_join27.q.out index 38ea679..fb48351 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join27.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join27.q.out @@ -30,38 +30,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) - Union 3 <- Map 6 (NONE, 0), Reducer 2 (NONE, 0) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Union 2 <- Map 1 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 200) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: src Filter Operator predicate: (key < 200) (type: boolean) Select Operator @@ -71,6 +49,24 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 200) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -88,20 +84,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -110,18 +93,16 @@ STAGE PLANS: 0 1 Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 273 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -139,8 +120,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join28.q.out b/ql/src/test/results/clientpositive/spark/auto_join28.q.out index 16681e9..139c295 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join28.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -17,6 +17,26 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -37,34 +57,14 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -87,8 +87,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -100,7 +100,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -136,7 +136,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -159,7 +159,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -186,10 +186,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -212,8 +212,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -225,7 +225,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -261,19 +261,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} filter predicates: 0 1 {(key > 10)} @@ -287,16 +287,16 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} filter predicates: 0 1 {(key > 10)} @@ -311,10 +311,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Map 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -337,8 +337,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -350,7 +350,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -389,26 +389,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -429,14 +409,34 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -459,8 +459,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -472,7 +472,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join29.q.out b/ql/src/test/results/clientpositive/spark/auto_join29.q.out index e618ce0..51ed229 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -17,6 +17,26 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -37,34 +57,14 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -87,8 +87,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -100,7 +100,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -644,7 +644,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -667,7 +667,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -694,10 +694,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -720,8 +720,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -733,7 +733,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -1277,19 +1277,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} filter predicates: 0 1 {(key > 10)} @@ -1303,16 +1303,16 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} filter predicates: 0 1 {(key > 10)} @@ -1327,10 +1327,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -1353,8 +1353,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -1366,7 +1366,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -1925,26 +1925,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key > 10)} - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1965,14 +1945,34 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + 2 {key} {value} + filter predicates: + 0 + 1 {(key > 10)} + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -1995,8 +1995,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -2008,7 +2008,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -2564,7 +2564,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -2583,7 +2583,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -2606,10 +2606,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -2631,8 +2631,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -2644,7 +2644,7 @@ STAGE PLANS: Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -2691,15 +2691,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key > 10)) (type: boolean) + predicate: ((key > 10) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -2711,18 +2711,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 10)) (type: boolean) + predicate: ((key < 10) and (key > 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -2738,10 +2738,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -2764,8 +2764,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -2777,7 +2777,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -3321,7 +3321,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -3344,7 +3344,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -3371,10 +3371,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -3397,8 +3397,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -3410,7 +3410,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -3454,19 +3454,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src3 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} filter predicates: 0 1 {(key > 10)} @@ -3480,16 +3480,16 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} filter predicates: 0 1 {(key > 10)} @@ -3504,10 +3504,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -3533,8 +3533,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -3546,7 +3546,7 @@ STAGE PLANS: Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -3612,7 +3612,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -3631,7 +3631,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -3654,10 +3654,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -3679,8 +3679,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -3692,7 +3692,7 @@ STAGE PLANS: Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join3.q.out b/ql/src/test/results/clientpositive/spark/auto_join3.q.out index 8b2442d..862b12e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -44,7 +44,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -68,7 +68,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -90,8 +90,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join30.q.out b/ql/src/test/results/clientpositive/spark/auto_join30.q.out index b401af5..632099a 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -103,19 +103,15 @@ STAGE PLANS: input vertices: 1 Reducer 5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -259,19 +255,15 @@ STAGE PLANS: input vertices: 1 Reducer 5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -415,19 +407,15 @@ STAGE PLANS: input vertices: 0 Reducer 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator @@ -503,11 +491,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 5 <- Map 4 (SORT, 3) Reducer 7 <- Map 6 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -541,18 +529,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {_col0} {_col1} + 1 {_col1} 2 keys: 0 _col0 (type: string) @@ -563,13 +551,13 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {_col1} + 1 {_col0} {_col1} 2 keys: 0 _col0 (type: string) @@ -579,11 +567,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -599,7 +587,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -621,23 +609,19 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 1 Reducer 7 - 2 Reducer 2 + 1 Reducer 5 + 2 Reducer 7 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -718,11 +702,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 5 <- Map 4 (SORT, 3) Reducer 7 <- Map 6 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -750,18 +734,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {_col0} {_col1} + 1 {_col1} 2 keys: 0 _col0 (type: string) @@ -772,13 +756,13 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {_col1} + 1 {_col0} {_col1} 2 keys: 0 _col0 (type: string) @@ -788,11 +772,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -805,7 +789,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -827,23 +811,19 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 1 Reducer 7 - 2 Reducer 2 + 1 Reducer 5 + 2 Reducer 7 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -924,11 +904,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 5 <- Map 4 (SORT, 3) Reducer 7 <- Map 6 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -956,18 +936,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {_col0} {_col1} + 1 {_col1} 2 keys: 0 _col0 (type: string) @@ -978,13 +958,13 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {_col1} + 1 {_col0} {_col1} 2 keys: 0 _col0 (type: string) @@ -994,11 +974,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -1011,7 +991,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1033,23 +1013,19 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 1 Reducer 7 - 2 Reducer 2 + 1 Reducer 5 + 2 Reducer 7 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1130,11 +1106,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 4 (SORT, 3) - Reducer 7 <- Map 6 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: src @@ -1147,7 +1123,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 3 Map Operator Tree: TableScan alias: src @@ -1161,7 +1137,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 5 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1178,7 +1154,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Reducer 7 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1199,11 +1175,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 5 (SORT, 3) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1217,7 +1193,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 6 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1239,23 +1215,19 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 0 Reducer 5 - 1 Reducer 7 + 0 Reducer 2 + 1 Reducer 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1336,11 +1308,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 4 (SORT, 3) - Reducer 7 <- Map 6 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: src @@ -1353,7 +1325,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 3 Map Operator Tree: TableScan alias: src @@ -1367,7 +1339,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 5 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1384,7 +1356,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Reducer 7 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1405,11 +1377,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 5 (SORT, 3) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1423,7 +1395,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 6 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1445,23 +1417,19 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 0 Reducer 5 - 1 Reducer 7 + 0 Reducer 2 + 1 Reducer 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join31.q.out b/ql/src/test/results/clientpositive/spark/auto_join31.q.out index 3c90dd1..4070275 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 4 <- Map 3 (SORT, 1) + Reducer 7 <- Map 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,33 +39,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) + expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator @@ -77,12 +77,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Reducer 4 + Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) + expressions: VALUE._col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator @@ -98,11 +98,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 6 <- Map 5 (SORT, 1) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Map 3 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 3 Map Operator Tree: TableScan alias: src @@ -116,7 +116,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 6 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -138,23 +138,19 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 0 Reducer 4 - 2 Reducer 2 + 0 Reducer 2 + 2 Reducer 7 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 7 + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join32.q.out b/ql/src/test/results/clientpositive/spark/auto_join32.q.out index fff4d54..28c022e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -36,7 +36,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: v @@ -57,10 +57,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: s @@ -79,26 +79,22 @@ STAGE PLANS: 1 name (type: string) outputColumnNames: _col0, _col8 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -197,18 +193,15 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -319,18 +312,15 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 - Select Operator - expressions: _col0 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col8 - Group By Operator - aggregations: count(DISTINCT _col8) - keys: _col0 (type: string), _col8 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join9.q.out b/ql/src/test/results/clientpositive/spark/auto_join9.q.out index addbcc2..f7c6948 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -47,7 +47,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -66,7 +66,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col8 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index 79f6a1a..ea77e79 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -178,7 +178,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: dim_pay_date @@ -247,7 +247,7 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [dim_pay_date] - Map 2 + Map 3 Map Operator Tree: TableScan alias: deal @@ -316,7 +316,7 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [deal] - Map 3 + Map 4 Map Operator Tree: TableScan alias: order_city @@ -390,7 +390,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: orderpayment @@ -411,7 +411,7 @@ STAGE PLANS: 1 date (type: string) outputColumnNames: _col0, _col3, _col4, _col9 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -425,7 +425,7 @@ STAGE PLANS: 1 dealid (type: int) outputColumnNames: _col3, _col4, _col9, _col16 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -439,7 +439,7 @@ STAGE PLANS: 1 cityid (type: int) outputColumnNames: _col4, _col9, _col16 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator @@ -527,7 +527,7 @@ STAGE PLANS: 1 userid (type: int) outputColumnNames: _col9, _col16 input vertices: - 0 Map 4 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index 26d2165..954d3d2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -19,7 +19,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -29,10 +29,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -42,7 +43,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -53,14 +53,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -147,51 +143,51 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 5 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -218,14 +214,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Select Operator @@ -312,13 +304,27 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value is not null) and (key > 100)) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -329,7 +335,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -342,20 +348,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key is not null and value is not null) and (key > 100)) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index c66b49d..cb4e07d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -82,14 +82,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -179,19 +178,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -323,19 +319,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -354,19 +347,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -374,14 +364,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -407,14 +394,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -516,14 +500,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -625,14 +608,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -758,14 +740,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -881,14 +862,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1010,17 +990,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -1113,14 +1091,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1217,14 +1194,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1334,14 +1310,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1472,16 +1447,13 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-0 Move Operator @@ -1660,32 +1632,26 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) + expressions: _col0 (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out index c2293e7..2e35c66 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out @@ -172,16 +172,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -405,16 +404,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -638,16 +636,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index 382c340..8472df9 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -135,17 +135,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -238,23 +236,19 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 2 Local Work: Map Reduce Local Work @@ -307,17 +301,15 @@ STAGE PLANS: input vertices: 0 Reducer 2 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out index e869487..4821996 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out @@ -147,7 +147,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -223,10 +223,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -246,22 +246,20 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -364,7 +362,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -475,7 +473,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -551,10 +549,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -574,22 +572,20 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -692,7 +688,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -802,7 +798,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -878,10 +874,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -901,22 +897,20 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1019,7 +1013,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1144,16 +1138,16 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 @@ -1178,51 +1172,78 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -1263,91 +1284,64 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [c] - /bucket_big/ds=2008-04-09 [c] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1370,23 +1364,21 @@ STAGE PLANS: 1 key (type: string) 2 key (type: string) input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Position of Big Table: 1 Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1489,7 +1481,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index ed86214..f3a8ba6 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -203,29 +203,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 - 1 - Position of Big Table: 0 + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 {key} + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -240,22 +245,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium - numFiles 3 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 170 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -263,24 +268,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_medium - name: default.bucket_medium + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_medium/ds=2008-04-08 [d] + /bucket_small/ds=2008-04-08 [a] + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: Map 2 Map Operator Tree: TableScan @@ -291,16 +301,28 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 condition expressions: 0 - 1 - 2 + 1 {key} keys: 0 key (type: string) 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 + outputColumnNames: _col6 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col6 (type: string), _col6 (type: string) + 1 key (type: string), key (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -359,23 +381,17 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 + 1 + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -390,22 +406,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_medium + numFiles 3 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_medium { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 170 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -413,24 +429,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_medium partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_medium { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_medium + name: default.bucket_medium Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_medium/ds=2008-04-08 [d] Stage: Stage-1 Spark @@ -451,20 +467,16 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 condition expressions: 0 1 - 2 keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + 0 _col6 (type: string), _col6 (type: string) + 1 key (type: string), key (type: string) input vertices: - 0 Map 5 - 1 Map 2 - Position of Big Table: 2 - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -475,22 +487,20 @@ STAGE PLANS: 0 1 input vertices: - 1 Map 1 + 1 Map 5 Position of Big Table: 0 - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out index 421d3e5..20ee657 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out @@ -67,14 +67,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -151,14 +150,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out index 6706def..0a48d00 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out @@ -65,14 +65,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -126,14 +125,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 392218a..5008a3f 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -152,16 +152,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -387,16 +386,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index cd2e3b1..3b081af 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -152,16 +152,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -336,16 +335,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -520,16 +518,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index 48a963c..2a11fb2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -168,16 +168,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -352,16 +351,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -536,16 +534,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 05493b5..0d971d2 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -133,16 +133,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -307,16 +306,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,16 +479,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out index 731dd1a..9d455dc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -103,19 +103,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -135,6 +122,19 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -143,14 +143,13 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -211,19 +210,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -243,6 +229,19 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -251,14 +250,13 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -359,14 +357,13 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -423,7 +420,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -436,7 +433,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -449,7 +446,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -470,17 +467,15 @@ STAGE PLANS: 1 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -556,14 +551,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -620,19 +614,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -652,6 +633,19 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -660,14 +654,13 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -724,19 +717,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -756,6 +736,19 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -764,14 +757,13 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -828,7 +820,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -841,7 +833,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -854,7 +846,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) @@ -875,17 +867,15 @@ STAGE PLANS: 1 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -961,14 +951,13 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1025,19 +1014,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1057,6 +1033,19 @@ STAGE PLANS: key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1065,14 +1054,13 @@ STAGE PLANS: condition expressions: 0 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index f9a184b..61eb6ae 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -185,16 +185,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -420,16 +419,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -655,16 +653,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out index c8a97d2..198d50d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -185,16 +185,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -420,16 +419,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -657,16 +655,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index 4853aee..f59e57f 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -82,14 +82,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -172,19 +171,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -284,19 +280,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -428,19 +421,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -459,19 +449,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -479,14 +466,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -512,14 +496,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -621,14 +602,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -730,14 +710,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -863,14 +842,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -986,14 +964,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1115,17 +1092,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -1218,14 +1193,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1307,14 +1281,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1413,14 +1386,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1530,14 +1502,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1631,14 +1602,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1721,19 +1691,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1833,19 +1800,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1977,19 +1941,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -2008,19 +1969,16 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2028,14 +1986,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -2061,14 +2016,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2170,14 +2122,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2279,14 +2230,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2412,14 +2362,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2535,14 +2484,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2630,14 +2578,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2719,14 +2666,13 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2825,14 +2771,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -2942,14 +2887,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out index 718edd9..835ee60 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out @@ -113,7 +113,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -190,10 +190,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -213,22 +213,20 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -284,7 +282,7 @@ STAGE PLANS: name: default.table1 Truncated Path -> Alias: /table1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out index 4687e03..73ba0b4 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out @@ -113,7 +113,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -190,10 +190,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -213,22 +213,20 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -284,7 +282,7 @@ STAGE PLANS: name: default.table1 Truncated Path -> Alias: /table1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index b0a81e8..92c1fb7 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -126,7 +126,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -148,7 +148,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -167,7 +167,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) @@ -212,10 +212,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: srcbucket_mapjoin @@ -223,23 +223,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 + value expressions: _col1 (type: double) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -265,7 +261,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -284,7 +280,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) @@ -327,7 +323,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: b @@ -348,10 +344,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: tab_part @@ -370,27 +366,23 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col6, _col7 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string) - outputColumnNames: _col6, _col7 + Group By Operator + aggregations: sum(substr(_col7, 5)) + keys: _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(_col7, 5)) - keys: _col6 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -415,7 +407,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) @@ -433,7 +425,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: tab @@ -478,7 +470,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: y @@ -499,10 +491,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -521,27 +513,23 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) + Group By Operator + aggregations: sum(substr(_col1, 5)) + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(_col1, 5)) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -567,7 +555,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: b @@ -586,7 +574,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) @@ -628,7 +616,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -647,7 +635,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -671,7 +659,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -693,8 +681,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) @@ -734,37 +722,37 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: c + alias: y Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} - 1 {value} + 0 {key} {value} + 1 keys: - 0 _col0 (type: int) + 0 key (type: int) 1 key (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: y + alias: c Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 + 0 {_col0} {_col1} + 1 {value} keys: - 0 key (type: int) + 0 _col0 (type: int) 1 key (type: int) Local Work: Map Reduce Local Work @@ -773,7 +761,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -792,36 +780,32 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -852,10 +836,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: srcbucket_mapjoin @@ -863,23 +847,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 + value expressions: _col1 (type: double) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -888,24 +868,20 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {value} - keys: - 0 _col0 (type: int) - 1 key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -924,7 +900,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) @@ -965,10 +941,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: srcbucket_mapjoin @@ -976,17 +952,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - value expressions: substr(value, 5) (type: string) - Reducer 3 + value expressions: substr(value, 5) (type: string) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -995,24 +967,20 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {value} - keys: - 0 _col0 (type: int) - 1 key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1031,7 +999,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) @@ -1073,7 +1041,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1095,7 +1063,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1114,7 +1082,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) @@ -1176,7 +1144,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1198,7 +1166,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1217,7 +1185,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) @@ -1256,35 +1224,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} - 1 {key} + 0 {value} + 1 keys: - 0 _col1 (type: string) - 1 value (type: string) + 0 key (type: int) + 1 key (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 + 0 {_col0} + 1 {key} keys: - 0 key (type: int) - 1 key (type: int) + 0 _col1 (type: string) + 1 value (type: string) Local Work: Map Reduce Local Work @@ -1311,7 +1279,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1324,7 +1292,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col12 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col12 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index 6d9cc20..c95ff74 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -125,35 +125,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} - 1 {key} + 0 {value} + 1 keys: - 0 _col1 (type: string) - 1 value (type: string) + 0 key (type: int) + 1 key (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 + 0 {_col0} + 1 {key} keys: - 0 key (type: int) - 1 key (type: int) + 0 _col1 (type: string) + 1 value (type: string) Local Work: Map Reduce Local Work @@ -180,7 +180,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -193,7 +193,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col12 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col12 (type: int) @@ -255,7 +255,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -277,7 +277,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -296,7 +296,7 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) @@ -591,91 +591,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan - alias: tab + alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {key} - keys: - 0 _col0 (type: int) - 1 key (type: int) Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {key} - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 0 Reducer 3 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Local Work: Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -695,91 +687,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan - alias: tab + alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(value) is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(key) (type: double) Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: UDFToDouble(value) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col2 - input vertices: - 0 Reducer 3 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Local Work: Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index 6b662b2..c6e1b97 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -100,7 +100,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Local Work: Map Reduce Local Work @@ -108,7 +108,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Local Work: Map Reduce Local Work @@ -208,7 +208,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Local Work: Map Reduce Local Work @@ -216,7 +216,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Local Work: Map Reduce Local Work @@ -410,7 +410,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -482,7 +482,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -503,7 +503,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -798,7 +798,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -870,7 +870,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -891,7 +891,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out index a3bfeaf..eda8ca2 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out @@ -201,7 +201,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -325,10 +325,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -348,22 +348,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -466,7 +464,7 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out index 7806e63..bb7214c 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out @@ -211,7 +211,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -335,10 +335,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -358,22 +358,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -476,7 +474,7 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -620,7 +618,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -744,10 +742,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -767,22 +765,20 @@ STAGE PLANS: 0 key (type: int), part (type: string) 1 key (type: int), part (type: string) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -885,7 +881,7 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out index 5d4450a..c0adef4 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out @@ -170,7 +170,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -245,10 +245,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -268,22 +268,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -338,7 +336,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -472,7 +470,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -547,10 +545,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -570,22 +568,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -640,7 +636,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index f2eb1e9..98d0706 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -129,7 +129,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -205,10 +205,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -228,22 +228,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -346,7 +344,7 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -475,7 +473,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -551,10 +549,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -574,22 +572,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -644,7 +640,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -772,7 +768,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -848,10 +844,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -871,22 +867,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -941,7 +935,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1071,7 +1065,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1147,10 +1141,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1170,22 +1164,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1240,7 +1232,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index 4686047..c6fc8d5 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -168,7 +168,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -266,7 +266,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -565,7 +565,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -642,7 +642,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -663,7 +663,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -984,7 +984,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1061,7 +1061,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1082,7 +1082,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index e0fb311..1ae9e64 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -199,7 +199,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -276,7 +276,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -297,7 +297,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -603,7 +603,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -680,7 +680,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -701,7 +701,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index 7374aa9..25c6c7b 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -185,7 +185,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -257,7 +257,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -278,7 +278,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -553,7 +553,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -625,7 +625,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -646,7 +646,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out index b317001..94952a1 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -235,7 +235,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -307,7 +307,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -328,7 +328,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -668,7 +668,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -740,7 +740,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -761,7 +761,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out index c7f1bb7..ca59d02 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out @@ -149,7 +149,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -226,10 +226,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -250,7 +250,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col8 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -318,7 +318,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out index 72d79c6..f419eaf 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out @@ -135,7 +135,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -211,10 +211,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -234,22 +234,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -304,7 +302,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -446,7 +444,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -522,10 +520,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -545,22 +543,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -615,7 +611,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out index 105b215..8b3848d 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out @@ -143,7 +143,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -219,10 +219,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -242,22 +242,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -312,7 +310,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -487,7 +485,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -563,10 +561,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -586,22 +584,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -656,7 +652,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out index c405b4f..4dbaaa2 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -143,7 +143,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -215,7 +215,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -236,7 +236,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out index 58022bc..1bf1c41 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -145,7 +145,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -217,7 +217,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -238,7 +238,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out index 422dff0..bfe5438 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out @@ -204,7 +204,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -278,7 +278,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -299,7 +299,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -445,7 +445,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -519,7 +519,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -540,7 +540,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -683,7 +683,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -757,7 +757,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -778,7 +778,7 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -924,7 +924,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -998,7 +998,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -1019,7 +1019,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1165,7 +1165,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -1239,7 +1239,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -1260,7 +1260,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1406,7 +1406,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -1480,7 +1480,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -1501,7 +1501,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1647,7 +1647,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -1721,7 +1721,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -1742,7 +1742,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1888,7 +1888,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -1962,7 +1962,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -1983,7 +1983,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2129,7 +2129,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -2203,7 +2203,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -2224,7 +2224,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out index 704eb46..79c1249 100644 --- a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out @@ -381,7 +381,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t2 @@ -403,7 +403,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -422,7 +422,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -512,7 +512,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t2 @@ -534,7 +534,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -553,7 +553,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), '3' (type: string), _col5 (type: string), '3' (type: string) @@ -610,7 +610,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t2 @@ -636,7 +636,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -659,7 +659,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -730,23 +730,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t3 - Statistics: Num rows: 0 Data size: 35 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work Map 2 Map Operator Tree: TableScan @@ -768,12 +751,29 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 0 Data size: 35 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -798,34 +798,30 @@ STAGE PLANS: input vertices: 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {key} {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/cross_join.q.out b/ql/src/test/results/clientpositive/spark/cross_join.q.out index d8942d9..38cc1e3 100644 --- a/ql/src/test/results/clientpositive/spark/cross_join.q.out +++ b/ql/src/test/results/clientpositive/spark/cross_join.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -33,7 +33,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -49,7 +49,7 @@ STAGE PLANS: 1 outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -87,7 +87,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -106,7 +106,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -122,7 +122,7 @@ STAGE PLANS: 1 outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -160,7 +160,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -182,7 +182,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -201,7 +201,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ctas.q.out b/ql/src/test/results/clientpositive/spark/ctas.q.out index 15c46ed..273c53b 100644 --- a/ql/src/test/results/clientpositive/spark/ctas.q.out +++ b/ql/src/test/results/clientpositive/spark/ctas.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/spark/groupby4.q.out b/ql/src/test/results/clientpositive/spark/groupby4.q.out index 20c6d0f..4ab1c49 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4.q.out @@ -64,18 +64,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index b80a54c..abca641 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -112,17 +112,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) + value expressions: _col0 (type: string), _col1 (type: double) Reducer 6 Reduce Operator Tree: Forward @@ -133,17 +129,13 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) + value expressions: _col0 (type: string), _col1 (type: double) Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out b/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out index ee09332..8f2add2 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out @@ -127,18 +127,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: array), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -147,18 +143,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: map), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Reducer 4 Reduce Operator Tree: Group By Operator @@ -167,18 +159,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: struct), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out index e8f7e41..ec6e7dc 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -95,17 +95,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: array), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array), _col1 (type: bigint) + value expressions: _col0 (type: array), _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Select Operator @@ -131,17 +127,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: map), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: map), _col1 (type: bigint) + value expressions: _col0 (type: map), _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out index f016ca1..0782fcd 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out @@ -46,16 +46,12 @@ STAGE PLANS: Filter Operator predicate: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5)) (type: boolean) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) - Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Forward diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index fc02ea3..d32a360 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -60,16 +60,12 @@ STAGE PLANS: Filter Operator predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Forward @@ -236,16 +232,12 @@ STAGE PLANS: Filter Operator predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + value expressions: key (type: string) Reducer 2 Reduce Operator Tree: Forward @@ -412,16 +404,12 @@ STAGE PLANS: Filter Operator predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Forward @@ -588,16 +576,12 @@ STAGE PLANS: Filter Operator predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + value expressions: key (type: string) Reducer 2 Reduce Operator Tree: Forward diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 6367c43..46a1b25 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -51,21 +51,17 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -74,21 +70,17 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -250,21 +242,17 @@ STAGE PLANS: Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -450,22 +438,18 @@ STAGE PLANS: Filter Operator predicate: (key <= 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -473,24 +457,20 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint), _col0 (type: string) - sort order: -+ - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: string) + sort order: -+ + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -569,39 +549,37 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -609,8 +587,8 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} - 1 {_col1} + 0 {_col1} + 1 {_col0} {_col1} keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -618,77 +596,67 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 3) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 4 <- Map 3 (GROUP, 3) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: --++ - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: --++ + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 98bb89e..3ea2bea 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -1693,24 +1693,20 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(_col1) + keys: (_col0 + _col0) (type: double) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1970,41 +1966,38 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - name default.outputtbl1 - numFiles 3 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + name default.outputtbl1 + numFiles 3 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2072,41 +2065,38 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - name default.outputtbl1 - numFiles 3 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + name default.outputtbl1 + numFiles 3 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2515,41 +2505,38 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 2 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -2744,28 +2731,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -2836,69 +2815,61 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 32 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 42 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -3129,24 +3100,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3210,19 +3177,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Stage: Stage-1 Spark @@ -3238,59 +3201,51 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Reducer 3 + Position of Big Table: 0 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Reducer 3 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 59db1f1..69999f5 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -6,7 +6,7 @@ CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) @@ -1748,24 +1748,20 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(_col1) + keys: (_col0 + _col0) (type: double) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: (_col0 + _col0) (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2042,41 +2038,38 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - name default.outputtbl1 - numFiles 3 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + name default.outputtbl1 + numFiles 3 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2144,41 +2137,38 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - name default.outputtbl1 - numFiles 3 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 + name default.outputtbl1 + numFiles 3 + numRows 5 + rawDataSize 17 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 22 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2605,41 +2595,38 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 2 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -2834,28 +2821,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -2926,69 +2905,61 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Position of Big Table: 0 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 32 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 42 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -3220,24 +3191,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3318,19 +3285,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 Stage: Stage-1 Spark @@ -3346,59 +3309,51 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} {_col2} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Reducer 4 + Position of Big Table: 0 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Reducer 4 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: diff --git a/ql/src/test/results/clientpositive/spark/having.q.out b/ql/src/test/results/clientpositive/spark/having.q.out index 3b80bd1..2659275 100644 --- a/ql/src/test/results/clientpositive/spark/having.q.out +++ b/ql/src/test/results/clientpositive/spark/having.q.out @@ -106,22 +106,18 @@ STAGE PLANS: Filter Operator predicate: (key <> 302) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -759,22 +755,18 @@ STAGE PLANS: Filter Operator predicate: (key > 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out b/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out index 44b6e8b..a44177c 100644 --- a/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out +++ b/ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out @@ -18,7 +18,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -40,7 +40,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -59,7 +59,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string) @@ -134,7 +134,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -157,7 +157,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -177,7 +177,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out index 53e0a7a..3d4eb18 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out @@ -77,6 +77,8 @@ SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 +Status: Failed +Status: Failed POSTHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. -- Tests a join which is not converted to a map join, the output should be bucketed and sorted. diff --git a/ql/src/test/results/clientpositive/spark/innerjoin.q.out b/ql/src/test/results/clientpositive/spark/innerjoin.q.out index 3b752b7..7d1b424 100644 --- a/ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -51,7 +51,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -70,7 +70,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) @@ -1198,7 +1198,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -1217,7 +1217,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -1237,7 +1237,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/input12.q.out b/ql/src/test/results/clientpositive/spark/input12.q.out index a32a928..4317186 100644 --- a/ql/src/test/results/clientpositive/spark/input12.q.out +++ b/ql/src/test/results/clientpositive/spark/input12.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/spark/join0.q.out b/ql/src/test/results/clientpositive/spark/join0.q.out index 3ed5d2d..aa8e811 100644 --- a/ql/src/test/results/clientpositive/spark/join0.q.out +++ b/ql/src/test/results/clientpositive/spark/join0.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -49,10 +49,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -75,19 +75,15 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join1.q.out b/ql/src/test/results/clientpositive/spark/join1.q.out index 0af1327..3473f11 100644 --- a/ql/src/test/results/clientpositive/spark/join1.q.out +++ b/ql/src/test/results/clientpositive/spark/join1.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -51,7 +51,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -70,7 +70,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join11.q.out b/ql/src/test/results/clientpositive/spark/join11.q.out index aad7415..2124047 100644 --- a/ql/src/test/results/clientpositive/spark/join11.q.out +++ b/ql/src/test/results/clientpositive/spark/join11.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -54,7 +54,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -77,7 +77,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join12.q.out b/ql/src/test/results/clientpositive/spark/join12.q.out index df0e03b..98c71c2 100644 --- a/ql/src/test/results/clientpositive/spark/join12.q.out +++ b/ql/src/test/results/clientpositive/spark/join12.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -85,7 +85,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -111,7 +111,7 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 + 1 Map 2 2 Map 3 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join13.q.out b/ql/src/test/results/clientpositive/spark/join13.q.out index 3079e92..3896c0e 100644 --- a/ql/src/test/results/clientpositive/spark/join13.q.out +++ b/ql/src/test/results/clientpositive/spark/join13.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -81,7 +81,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -104,7 +104,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2, _col3 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col2) is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join14.q.out b/ql/src/test/results/clientpositive/spark/join14.q.out index 4f4988d..28f2796 100644 --- a/ql/src/test/results/clientpositive/spark/join14.q.out +++ b/ql/src/test/results/clientpositive/spark/join14.q.out @@ -1,12 +1,12 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- SORT_QUERY_RESULTS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest1 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) --- SORT_QUERY_RESULTS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE @@ -31,7 +31,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -53,7 +53,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -72,7 +72,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join15.q.out b/ql/src/test/results/clientpositive/spark/join15.q.out index 60c23d2..b4638cd 100644 --- a/ql/src/test/results/clientpositive/spark/join15.q.out +++ b/ql/src/test/results/clientpositive/spark/join15.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -35,10 +35,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -57,7 +57,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join17.q.out b/ql/src/test/results/clientpositive/spark/join17.q.out index 768b03e..a89ddeb 100644 --- a/ql/src/test/results/clientpositive/spark/join17.q.out +++ b/ql/src/test/results/clientpositive/spark/join17.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -141,7 +141,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -162,7 +162,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join18.q.out b/ql/src/test/results/clientpositive/spark/join18.q.out index 0196d3b..8d8f263 100644 --- a/ql/src/test/results/clientpositive/spark/join18.q.out +++ b/ql/src/test/results/clientpositive/spark/join18.q.out @@ -32,34 +32,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -78,24 +58,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -105,14 +101,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -120,21 +116,17 @@ STAGE PLANS: Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index c0d3fdc..f7fbf4e 100644 --- a/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -38,34 +38,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT value), count(DISTINCT key) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -84,24 +64,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value), count(DISTINCT key) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -111,14 +107,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -126,21 +122,17 @@ STAGE PLANS: Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join19.q.out b/ql/src/test/results/clientpositive/spark/join19.q.out index b5a32ac..b54f575 100644 --- a/ql/src/test/results/clientpositive/spark/join19.q.out +++ b/ql/src/test/results/clientpositive/spark/join19.q.out @@ -132,10 +132,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: t6 + alias: t2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: ((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__LABEL_REL') and subject is not null) (type: boolean) @@ -146,20 +146,22 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col2} {_col3} {_col7} + 0 {_col0} 1 {_col1} + 2 {_col0} keys: - 0 _col7 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) + 2 _col1 (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: t2 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: ((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__LABEL_REL') and subject is not null) (type: boolean) + predicate: (((predicate = 'http://www.ontosearch.com/2007/12/ontosofa-ns#_from') and object is not null) and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: subject (type: string), object (type: string) @@ -176,30 +178,30 @@ STAGE PLANS: 2 _col1 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: t3 + alias: t4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://www.ontosearch.com/2007/12/ontosofa-ns#_from') and object is not null) and subject is not null) (type: boolean) + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Author')) and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: subject (type: string), object (type: string) - outputColumnNames: _col0, _col1 + expressions: subject (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} - 1 {_col1} - 2 {_col0} + 0 {_col0} {_col2} {_col3} + 1 + 2 {_col1} keys: - 0 _col0 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - 2 _col1 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan alias: t5 @@ -225,24 +227,22 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: t4 + alias: t6 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Author')) and subject is not null) (type: boolean) + predicate: ((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__LABEL_REL') and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: subject (type: string) - outputColumnNames: _col0 + expressions: subject (type: string), object (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col2} {_col3} - 1 - 2 {_col1} + 0 {_col0} {_col2} {_col3} {_col7} + 1 {_col1} keys: - 0 _col3 (type: string) + 0 _col7 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Local Work: Map Reduce Local Work @@ -250,7 +250,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -293,8 +293,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col2, _col3, _col7 input vertices: - 1 Map 6 - 2 Map 4 + 1 Map 4 + 2 Map 5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -307,7 +307,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2, _col3, _col7, _col9 input vertices: - 1 Map 1 + 1 Map 6 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col9 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join2.q.out b/ql/src/test/results/clientpositive/spark/join2.q.out index eee23ae..40c54cd 100644 --- a/ql/src/test/results/clientpositive/spark/join2.q.out +++ b/ql/src/test/results/clientpositive/spark/join2.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -52,7 +52,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -71,7 +71,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) @@ -90,7 +90,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -109,7 +109,7 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col11 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join20.q.out b/ql/src/test/results/clientpositive/spark/join20.q.out index d7c2c7c..7d4feb7 100644 --- a/ql/src/test/results/clientpositive/spark/join20.q.out +++ b/ql/src/test/results/clientpositive/spark/join20.q.out @@ -19,15 +19,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -39,18 +39,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -66,10 +66,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -92,8 +92,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -105,7 +105,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -704,15 +704,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) + predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -724,18 +724,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) + predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -751,10 +751,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -777,8 +777,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -790,7 +790,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join21.q.out b/ql/src/test/results/clientpositive/spark/join21.q.out index 903d930..7601371 100644 --- a/ql/src/test/results/clientpositive/spark/join21.q.out +++ b/ql/src/test/results/clientpositive/spark/join21.q.out @@ -17,6 +17,26 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 {key} {value} + filter predicates: + 0 {(key < 10)} + 1 + 2 {(key < 10)} + keys: + 0 key (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -37,34 +57,14 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 4 <- Map 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -87,8 +87,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -100,7 +100,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join22.q.out b/ql/src/test/results/clientpositive/spark/join22.q.out index 90c653e..2c96cef 100644 --- a/ql/src/test/results/clientpositive/spark/join22.q.out +++ b/ql/src/test/results/clientpositive/spark/join22.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -53,7 +53,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -72,7 +72,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join23.q.out b/ql/src/test/results/clientpositive/spark/join23.q.out index bb7a324..465288c 100644 --- a/ql/src/test/results/clientpositive/spark/join23.q.out +++ b/ql/src/test/results/clientpositive/spark/join23.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -35,10 +35,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -57,7 +57,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join25.q.out b/ql/src/test/results/clientpositive/spark/join25.q.out index 36d8742..84e496a 100644 --- a/ql/src/test/results/clientpositive/spark/join25.q.out +++ b/ql/src/test/results/clientpositive/spark/join25.q.out @@ -31,7 +31,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -53,7 +53,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -72,7 +72,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join26.q.out b/ql/src/test/results/clientpositive/spark/join26.q.out index 2da4551..83f45f8 100644 --- a/ql/src/test/results/clientpositive/spark/join26.q.out +++ b/ql/src/test/results/clientpositive/spark/join26.q.out @@ -115,16 +115,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} + 0 1 {value} 2 {value} keys: @@ -139,12 +139,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -152,52 +149,53 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 + 0 {key} 1 {value} 2 {value} keys: @@ -212,9 +210,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -222,40 +223,39 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark @@ -285,8 +285,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col6, _col11 input vertices: - 0 Map 3 - 2 Map 1 + 0 Map 1 + 2 Map 3 Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join27.q.out b/ql/src/test/results/clientpositive/spark/join27.q.out index dcea98f..a578279 100644 --- a/ql/src/test/results/clientpositive/spark/join27.q.out +++ b/ql/src/test/results/clientpositive/spark/join27.q.out @@ -31,7 +31,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -53,7 +53,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -72,7 +72,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join28.q.out b/ql/src/test/results/clientpositive/spark/join28.q.out index 45d437a..1136cb2 100644 --- a/ql/src/test/results/clientpositive/spark/join28.q.out +++ b/ql/src/test/results/clientpositive/spark/join28.q.out @@ -101,35 +101,31 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join29.q.out b/ql/src/test/results/clientpositive/spark/join29.q.out index 87300df..738e302 100644 --- a/ql/src/test/results/clientpositive/spark/join29.q.out +++ b/ql/src/test/results/clientpositive/spark/join29.q.out @@ -36,10 +36,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: x @@ -47,23 +47,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 + value expressions: _col1 (type: bigint) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -72,26 +68,22 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: y @@ -99,23 +91,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 + value expressions: _col1 (type: bigint) + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -124,36 +112,32 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 4 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/join3.q.out b/ql/src/test/results/clientpositive/spark/join3.q.out index 8f092fc..77e3479 100644 --- a/ql/src/test/results/clientpositive/spark/join3.q.out +++ b/ql/src/test/results/clientpositive/spark/join3.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -48,7 +48,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -72,7 +72,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -94,8 +94,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join30.q.out b/ql/src/test/results/clientpositive/spark/join30.q.out index 5def6dc..f6d2117 100644 --- a/ql/src/test/results/clientpositive/spark/join30.q.out +++ b/ql/src/test/results/clientpositive/spark/join30.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: x @@ -50,10 +50,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -72,27 +72,23 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/join31.q.out b/ql/src/test/results/clientpositive/spark/join31.q.out index 0bde482..4085a98 100644 --- a/ql/src/test/results/clientpositive/spark/join31.q.out +++ b/ql/src/test/results/clientpositive/spark/join31.q.out @@ -38,10 +38,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: x @@ -49,23 +49,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 5 + value expressions: _col1 (type: bigint) + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -90,11 +86,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: y @@ -102,23 +98,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 + value expressions: _col1 (type: bigint) + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -143,25 +135,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 0 Reducer 5 + 0 Reducer 2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + value expressions: _col1 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index 092ad82..a2831f8 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -113,21 +113,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col6} - 1 + 0 {value} + 1 {value} keys: - 0 _col1 (type: string) - 1 value (type: string) - Position of Big Table: 0 + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -135,12 +135,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -148,57 +145,58 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {value} + 0 {_col0} {_col6} + 1 keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -206,9 +204,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -216,40 +217,39 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark @@ -276,7 +276,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -290,7 +290,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col6, _col11 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -421,8 +421,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 7e31c03..c6af610 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -121,21 +121,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col6} - 1 + 0 {value} + 1 {value} keys: - 0 _col1 (type: string) - 1 value (type: string) - Position of Big Table: 0 + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -143,12 +143,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -156,57 +153,58 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {value} + 0 {_col0} {_col6} + 1 keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -214,9 +212,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -224,40 +225,39 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark @@ -284,7 +284,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -298,7 +298,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col6, _col11 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -429,8 +429,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY @@ -626,22 +626,20 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: z + alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col5} - 1 {value} - 2 {value} + 0 + 1 {key} keys: - 0 _col5 (type: string) - 1 key (type: string) - 2 key (type: string) + 0 value (type: string) + 1 value (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -693,7 +691,7 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan @@ -768,20 +766,22 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: x + alias: z Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 - 1 {key} + 0 {_col5} + 1 {value} + 2 {value} keys: - 0 value (type: string) - 1 value (type: string) + 0 _col5 (type: string) + 1 key (type: string) + 2 key (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -833,7 +833,7 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [x] + /src1 [z] Stage: Stage-1 Spark @@ -860,7 +860,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col5 input vertices: - 1 Map 4 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -878,7 +878,7 @@ STAGE PLANS: outputColumnNames: _col5, _col11, _col16 input vertices: 1 Map 3 - 2 Map 2 + 2 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6426 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1228,7 +1228,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -1373,7 +1373,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -1394,57 +1394,53 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1543,8 +1539,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY @@ -1754,7 +1750,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -1895,7 +1891,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -1916,62 +1912,58 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Position of Big Table: 0 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -2075,8 +2067,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY @@ -2206,7 +2198,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -2245,7 +2237,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -2264,37 +2256,33 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 Local Work: Map Reduce Local Work @@ -2331,8 +2319,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)x.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY @@ -2450,7 +2438,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -2489,7 +2477,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -2508,37 +2496,33 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 Local Work: Map Reduce Local Work @@ -2575,8 +2559,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 EXPRESSION [(src1)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)y.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j2 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 092ad82..a2831f8 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -113,21 +113,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col6} - 1 + 0 {value} + 1 {value} keys: - 0 _col1 (type: string) - 1 value (type: string) - Position of Big Table: 0 + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -135,12 +135,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -148,57 +145,58 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {value} + 0 {_col0} {_col6} + 1 keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 + 0 _col1 (type: string) + 1 value (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -206,9 +204,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -216,40 +217,39 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark @@ -276,7 +276,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -290,7 +290,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col6, _col11 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -421,8 +421,8 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out index 09e6781..851a981 100644 --- a/ql/src/test/results/clientpositive/spark/join35.q.out +++ b/ql/src/test/results/clientpositive/spark/join35.q.out @@ -171,24 +171,20 @@ STAGE PLANS: isSamplingPred: false predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -248,24 +244,20 @@ STAGE PLANS: isSamplingPred: false predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -390,16 +382,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false Reducer 4 Needs Tagging: true Reduce Operator Tree: @@ -449,16 +438,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/join36.q.out b/ql/src/test/results/clientpositive/spark/join36.q.out index f6df972..a65e979 100644 --- a/ql/src/test/results/clientpositive/spark/join36.q.out +++ b/ql/src/test/results/clientpositive/spark/join36.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -93,7 +93,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -112,7 +112,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/join37.q.out b/ql/src/test/results/clientpositive/spark/join37.q.out index 33962b5..bdb9aa6 100644 --- a/ql/src/test/results/clientpositive/spark/join37.q.out +++ b/ql/src/test/results/clientpositive/spark/join37.q.out @@ -31,7 +31,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x @@ -53,7 +53,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -72,7 +72,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join38.q.out b/ql/src/test/results/clientpositive/spark/join38.q.out index a511b03..735d7ea 100644 --- a/ql/src/test/results/clientpositive/spark/join38.q.out +++ b/ql/src/test/results/clientpositive/spark/join38.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -79,10 +79,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -101,27 +101,23 @@ STAGE PLANS: 1 '111' (type: string) outputColumnNames: _col1, _col10 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col10 (type: string) - outputColumnNames: _col1, _col10 + Group By Operator + aggregations: count(1) + keys: _col1 (type: string), _col10 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: string), _col10 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/join39.q.out b/ql/src/test/results/clientpositive/spark/join39.q.out index e78e6b9..416c92e 100644 --- a/ql/src/test/results/clientpositive/spark/join39.q.out +++ b/ql/src/test/results/clientpositive/spark/join39.q.out @@ -31,7 +31,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -57,7 +57,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -73,7 +73,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 2 + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join40.q.out b/ql/src/test/results/clientpositive/spark/join40.q.out index a4a0dc6..b054ad8 100644 --- a/ql/src/test/results/clientpositive/spark/join40.q.out +++ b/ql/src/test/results/clientpositive/spark/join40.q.out @@ -18,7 +18,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -44,7 +44,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -60,7 +60,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 2 + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -674,7 +674,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -696,7 +696,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -715,7 +715,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: string) @@ -1796,15 +1796,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -1816,18 +1816,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -1843,10 +1843,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -1869,8 +1869,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -1882,7 +1882,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -2481,15 +2481,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 10) and (key < 15)) (type: boolean) + predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -2501,18 +2501,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 15) and (key < 10)) (type: boolean) + predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -2528,10 +2528,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -2554,8 +2554,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -2567,7 +2567,7 @@ STAGE PLANS: Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -3163,7 +3163,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -3189,7 +3189,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -3205,7 +3205,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 2 + 1 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -3819,7 +3819,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -3840,10 +3840,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -3861,22 +3861,20 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/join41.q.out b/ql/src/test/results/clientpositive/spark/join41.q.out index a61c3c5..0cea03a 100644 --- a/ql/src/test/results/clientpositive/spark/join41.q.out +++ b/ql/src/test/results/clientpositive/spark/join41.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -46,7 +46,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -62,7 +62,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -113,7 +113,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -135,7 +135,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -151,7 +151,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join9.q.out b/ql/src/test/results/clientpositive/spark/join9.q.out index f2c915f..620a487 100644 --- a/ql/src/test/results/clientpositive/spark/join9.q.out +++ b/ql/src/test/results/clientpositive/spark/join9.q.out @@ -83,7 +83,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -157,7 +157,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -178,7 +178,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col8 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index 0066507..5426e15 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: p2 @@ -33,7 +33,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -49,7 +49,7 @@ STAGE PLANS: 1 outputColumnNames: _col1, _col13 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col13 (type: string) @@ -89,10 +89,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -100,18 +100,18 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_name} - 1 {p_name} - 2 + 1 + 2 {p_name} keys: 0 p_name (type: string) 1 p_name (type: string) 2 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -119,8 +119,8 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_name} - 1 - 2 {p_name} + 1 {p_name} + 2 keys: 0 p_name (type: string) 1 p_name (type: string) @@ -132,7 +132,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -155,7 +155,7 @@ STAGE PLANS: outputColumnNames: _col1, _col13, _col25 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) @@ -221,7 +221,7 @@ STAGE PLANS: 2 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: p1 @@ -268,7 +268,7 @@ STAGE PLANS: outputColumnNames: _col1, _col12, _col14 input vertices: 1 Map 1 - 2 Map 2 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col12) and (_col12 = _col14)) (type: boolean) @@ -334,7 +334,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -369,7 +369,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -388,7 +388,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col12, _col13, _col25 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) @@ -434,7 +434,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: p2 @@ -459,23 +459,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} {_col25} - 1 {p_name} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -492,7 +475,7 @@ STAGE PLANS: 1 p_name (type: string), p_partkey (type: int) outputColumnNames: _col0, _col1, _col12, _col13 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -503,12 +486,29 @@ STAGE PLANS: 1 p_name (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} {_col25} + 1 {p_name} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -527,7 +527,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col12, _col13, _col25 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -540,7 +540,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) @@ -586,7 +586,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: p2 @@ -611,23 +611,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} {_col25} - 1 {p_name} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -644,7 +627,7 @@ STAGE PLANS: 1 p_name (type: string), p_partkey (type: int) outputColumnNames: _col0, _col1, _col12, _col13 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -655,12 +638,29 @@ STAGE PLANS: 1 p_name (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col12} {_col13} {_col25} + 1 {p_name} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -679,7 +679,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col12, _col13, _col25 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -692,7 +692,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out index acc8e50..b7b6b4a 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out @@ -14,10 +14,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -25,18 +25,18 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) 2 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -44,8 +44,8 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -57,7 +57,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -80,7 +80,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) @@ -118,10 +118,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -129,18 +129,18 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) 2 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -148,8 +148,8 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -161,7 +161,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -184,7 +184,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -280,7 +280,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -299,7 +299,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) @@ -337,38 +337,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_partkey = 1) and p_name is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: - 0 _col13 (type: string) - 1 p_name (type: string) + 0 + 1 Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_partkey = 1) and p_name is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: - 0 - 1 + 0 _col13 (type: string) + 1 p_name (type: string) Local Work: Map Reduce Local Work @@ -376,7 +376,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -405,7 +405,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index 704f305..5892ce9 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -14,10 +14,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p4 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -25,9 +25,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -35,7 +35,7 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -56,10 +56,10 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -67,9 +67,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -82,7 +82,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -107,9 +107,9 @@ STAGE PLANS: 3 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 1 Map 3 - 2 Map 2 - 3 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) @@ -150,7 +150,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: p2 @@ -175,23 +175,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -208,7 +191,7 @@ STAGE PLANS: 1 p_name (type: string), p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -219,12 +202,29 @@ STAGE PLANS: 1 p_name (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -243,7 +243,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -256,7 +256,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index 22ca2dd..34932a9 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -16,10 +16,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -27,18 +27,18 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) 2 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -46,8 +46,8 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -59,7 +59,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -82,7 +82,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) @@ -125,10 +125,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -136,18 +136,18 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) 2 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -155,8 +155,8 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -168,7 +168,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -191,7 +191,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) @@ -257,7 +257,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -292,7 +292,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -311,7 +311,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) @@ -354,38 +354,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: (p_name is not null and (p_partkey = 1)) (type: boolean) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: - 0 _col13 (type: string) - 1 p_name (type: string) + 0 + 1 Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and (p_partkey = 1)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: - 0 - 1 + 0 _col13 (type: string) + 1 p_name (type: string) Local Work: Map Reduce Local Work @@ -393,7 +393,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -422,7 +422,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col25 = _col13) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 6f6223d..050291b 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -16,10 +16,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p4 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -27,9 +27,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -37,7 +37,7 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -58,10 +58,10 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) @@ -69,9 +69,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p_name (type: string) @@ -84,7 +84,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -109,9 +109,9 @@ STAGE PLANS: 3 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 1 Map 3 - 2 Map 2 - 3 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) @@ -157,7 +157,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: p2 @@ -182,23 +182,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -215,7 +198,7 @@ STAGE PLANS: 1 p_name (type: string), p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -226,12 +209,29 @@ STAGE PLANS: 1 p_name (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} + 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -250,7 +250,7 @@ STAGE PLANS: 1 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -263,7 +263,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 168921f..146cd28 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -70,38 +70,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) 2 p3_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -113,7 +113,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -136,7 +136,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) @@ -174,38 +174,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) 2 p3_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -217,7 +217,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) @@ -278,38 +278,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} keys: - 0 _col13 (type: string) - 1 p3_name (type: string) + 0 + 1 Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: - 0 - 1 + 0 _col13 (type: string) + 1 p3_name (type: string) Local Work: Map Reduce Local Work @@ -317,7 +317,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -349,7 +349,7 @@ STAGE PLANS: 1 p3_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) @@ -387,38 +387,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: ((p2_partkey = 1) and p2_name is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} keys: - 0 _col13 (type: string) - 1 p3_name (type: string) + 0 + 1 Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: ((p2_partkey = 1) and p2_name is not null) (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: - 0 - 1 + 0 _col13 (type: string) + 1 p3_name (type: string) Local Work: Map Reduce Local Work @@ -426,7 +426,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -455,7 +455,7 @@ STAGE PLANS: 1 p3_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index 6bd2916..c7c0e28 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -70,20 +70,20 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -91,7 +91,7 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -112,20 +112,20 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: p2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -138,7 +138,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -163,9 +163,9 @@ STAGE PLANS: 3 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 1 Map 3 - 2 Map 2 - 3 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) @@ -209,35 +209,35 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} keys: - 0 _col13 (type: string) - 1 p3_name (type: string) + 0 p_name (type: string), p_partkey (type: int) + 1 p2_name (type: string), p2_partkey (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p2_name (type: string), p2_partkey (type: int) + 0 _col13 (type: string) + 1 p3_name (type: string) Local Work: Map Reduce Local Work @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -264,7 +264,7 @@ STAGE PLANS: 1 p2_name (type: string), p2_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -277,7 +277,7 @@ STAGE PLANS: 1 p3_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -293,7 +293,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: p4 @@ -312,7 +312,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out index f6cd06b..4f48f51 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -72,38 +72,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) 2 p3_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -115,7 +115,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -138,7 +138,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) @@ -181,38 +181,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) 2 p3_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -224,7 +224,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -247,7 +247,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) @@ -290,38 +290,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} keys: - 0 _col13 (type: string) - 1 p3_name (type: string) + 0 + 1 Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: - 0 - 1 + 0 _col13 (type: string) + 1 p3_name (type: string) Local Work: Map Reduce Local Work @@ -329,7 +329,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -361,7 +361,7 @@ STAGE PLANS: 1 p3_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) @@ -404,38 +404,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: (p2_name is not null and (p2_partkey = 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} keys: - 0 _col13 (type: string) - 1 p3_name (type: string) + 0 + 1 Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (p2_name is not null and (p2_partkey = 1)) (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: - 0 - 1 + 0 _col13 (type: string) + 1 p3_name (type: string) Local Work: Map Reduce Local Work @@ -443,7 +443,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -472,7 +472,7 @@ STAGE PLANS: 1 p3_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col25 = _col13) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index e75599c..a0f158a 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -72,20 +72,20 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: p4 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p2_name is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -93,7 +93,7 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -114,20 +114,20 @@ STAGE PLANS: 3 p_name (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: p2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: p4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} keys: 0 p_name (type: string) 1 p2_name (type: string) @@ -140,7 +140,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -165,9 +165,9 @@ STAGE PLANS: 3 p_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 1 Map 3 - 2 Map 2 - 3 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) @@ -216,35 +216,35 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} + 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} + 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} keys: - 0 _col13 (type: string) - 1 p3_name (type: string) + 0 p_name (type: string), p_partkey (type: int) + 1 p2_name (type: string), p2_partkey (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: p2 + alias: p3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) + predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} + 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} + 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p2_name (type: string), p2_partkey (type: int) + 0 _col13 (type: string) + 1 p3_name (type: string) Local Work: Map Reduce Local Work @@ -252,7 +252,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -271,7 +271,7 @@ STAGE PLANS: 1 p2_name (type: string), p2_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -284,7 +284,7 @@ STAGE PLANS: 1 p3_name (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -300,7 +300,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: p4 @@ -319,7 +319,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 1a6f157..87bfb89 100644 --- a/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -102,7 +102,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -179,7 +179,7 @@ STAGE PLANS: name: default.a Truncated Path -> Alias: /a [b] - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -261,7 +261,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -287,8 +287,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Position of Big Table: 0 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -484,21 +484,21 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} filter mappings: 1 [0, 1, 2, 1] filter predicates: @@ -560,22 +560,22 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] + /a [a] Map 3 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 50) (type: boolean) + predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} filter mappings: 1 [0, 1, 2, 1] filter predicates: @@ -637,13 +637,13 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] + /a [c] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -669,8 +669,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 2 Map 2 + 0 Map 1 + 2 Map 3 Position of Big Table: 1 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -880,21 +880,21 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: c + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} filter mappings: 1 [0, 2, 2, 2] filter predicates: @@ -956,22 +956,22 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] + /a [a] Map 3 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 50) (type: boolean) + predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} filter mappings: 1 [0, 2, 2, 2] filter predicates: @@ -1033,13 +1033,13 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] + /a [c] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1065,8 +1065,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 2 Map 2 + 0 Map 1 + 2 Map 3 Position of Big Table: 1 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1294,21 +1294,17 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 40) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - tag: 3 - value expressions: value (type: int) - auto parallelism: false + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1357,7 +1353,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [d] + /a [a] Map 3 Map Operator Tree: TableScan @@ -1491,17 +1487,21 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: int) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: (value = 40) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 3 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1550,7 +1550,7 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] + /a [d] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -1735,22 +1735,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 40) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {key} {value} + 1 {value} 2 {key} {value} - 3 {value} + 3 {key} {value} filter mappings: 0 [1, 1, 2, 1, 3, 1] filter predicates: @@ -1814,22 +1814,22 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [d] - Map 2 + /a [b] + Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 50) (type: boolean) + predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {value} - 2 {key} {value} + 1 {key} {value} + 2 {value} 3 {key} {value} filter mappings: 0 [1, 1, 2, 1, 3, 1] @@ -1894,23 +1894,23 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] - Map 3 + /a [c] + Map 4 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 40) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} 1 {key} {value} - 2 {value} - 3 {key} {value} + 2 {key} {value} + 3 {value} filter mappings: 0 [1, 1, 2, 1, 3, 1] filter predicates: @@ -1974,13 +1974,13 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] + /a [d] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -2012,7 +2012,7 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - 3 Map 1 + 3 Map 4 Position of Big Table: 0 Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join_hive_626.q.out b/ql/src/test/results/clientpositive/spark/join_hive_626.q.out index 076cfe7..7fa0894 100644 --- a/ql/src/test/results/clientpositive/spark/join_hive_626.q.out +++ b/ql/src/test/results/clientpositive/spark/join_hive_626.q.out @@ -77,35 +77,35 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: hive_count - Statistics: Num rows: 0 Data size: 5 Basic stats: PARTIAL Column stats: NONE + alias: hive_bar + Statistics: Num rows: 0 Data size: 23 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: bar_id is not null (type: boolean) + predicate: (foo_id is not null and bar_id is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col1} {_col13} - 1 {n} + 0 {foo_name} + 1 {bar_id} {bar_name} keys: - 0 _col9 (type: int) - 1 bar_id (type: int) + 0 foo_id (type: int) + 1 foo_id (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: hive_bar - Statistics: Num rows: 0 Data size: 23 Basic stats: PARTIAL Column stats: NONE + alias: hive_count + Statistics: Num rows: 0 Data size: 5 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (foo_id is not null and bar_id is not null) (type: boolean) + predicate: bar_id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {foo_name} - 1 {bar_id} {bar_name} + 0 {_col1} {_col13} + 1 {n} keys: - 0 foo_id (type: int) - 1 foo_id (type: int) + 0 _col9 (type: int) + 1 bar_id (type: int) Local Work: Map Reduce Local Work @@ -132,7 +132,7 @@ STAGE PLANS: 1 foo_id (type: int) outputColumnNames: _col1, _col9, _col13 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -145,7 +145,7 @@ STAGE PLANS: 1 bar_id (type: int) outputColumnNames: _col1, _col13, _col22 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col1 (type: string), _col13 (type: string), _col22 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out index 550cbb7..8f500be 100644 --- a/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -117,16 +117,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} + 0 1 {value} 2 {value} keys: @@ -141,12 +141,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -154,52 +151,53 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1 [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 + 0 {key} 1 {value} 2 {value} keys: @@ -214,9 +212,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -224,40 +225,39 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark @@ -287,8 +287,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col6, _col11 input vertices: - 0 Map 3 - 2 Map 1 + 0 Map 1 + 2 Map 3 Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -682,13 +682,13 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} @@ -706,62 +706,60 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1_copy input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments defaultdefault + columns.comments columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1_copy numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1_copy { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments defaultdefault + columns.comments columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1_copy + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1_copy { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1_copy + name: default.src1_copy Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src1_copy [x] Map 3 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} @@ -779,50 +777,52 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1_copy + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments + columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1_copy + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1_copy { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments + columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1_copy - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1_copy { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1_copy - name: default.src1_copy + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1_copy [x] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark @@ -852,8 +852,8 @@ STAGE PLANS: 2 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col6, _col11 input vertices: - 0 Map 3 - 2 Map 1 + 0 Map 1 + 2 Map 3 Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index 1f42214..057c0f7 100644 --- a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -33,7 +33,7 @@ STAGE PLANS: 2 key (type: string), hr (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -56,10 +56,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -80,23 +80,21 @@ STAGE PLANS: 1 key (type: string), hr (type: string) 2 key (type: string), hr (type: string) input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/join_merging.q.out b/ql/src/test/results/clientpositive/spark/join_merging.q.out index ce4cd8f..a67f8b8 100644 --- a/ql/src/test/results/clientpositive/spark/join_merging.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -18,23 +18,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -53,12 +37,28 @@ STAGE PLANS: 2 p_partkey (type: int) Local Work: Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -77,7 +77,7 @@ STAGE PLANS: 2 p_partkey (type: int) outputColumnNames: _col5, _col17 input vertices: - 0 Map 3 + 0 Map 1 1 Map 2 Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -120,23 +120,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Map 1 Map Operator Tree: TableScan alias: p1 @@ -155,12 +139,28 @@ STAGE PLANS: 2 p_partkey (type: int) Local Work: Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {p_size} + 1 {p_size} + 2 + keys: + 0 p_partkey (type: int) + 1 p_partkey (type: int) + 2 p_partkey (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: p3 @@ -179,7 +179,7 @@ STAGE PLANS: 2 p_partkey (type: int) outputColumnNames: _col5, _col17 input vertices: - 0 Map 3 + 0 Map 1 1 Map 2 Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/spark/join_nullsafe.q.out b/ql/src/test/results/clientpositive/spark/join_nullsafe.q.out index 122f290..8ba653d 100644 --- a/ql/src/test/results/clientpositive/spark/join_nullsafe.q.out +++ b/ql/src/test/results/clientpositive/spark/join_nullsafe.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -53,7 +53,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -70,7 +70,7 @@ STAGE PLANS: nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) @@ -125,7 +125,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -144,7 +144,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -168,7 +168,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -190,8 +190,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) @@ -237,7 +237,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -253,7 +253,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -274,7 +274,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -294,8 +294,8 @@ STAGE PLANS: nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) @@ -368,7 +368,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -387,7 +387,7 @@ STAGE PLANS: 2 key (type: int), value (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -411,7 +411,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -434,8 +434,8 @@ STAGE PLANS: nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 4 Data size: 37 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) @@ -481,7 +481,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -497,7 +497,7 @@ STAGE PLANS: 2 key (type: int), value (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -518,7 +518,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -538,8 +538,8 @@ STAGE PLANS: nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) @@ -1638,7 +1638,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1660,7 +1660,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1680,7 +1680,7 @@ STAGE PLANS: nullSafes: [true] outputColumnNames: _col1, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: null (type: void), _col1 (type: int), _col5 (type: int), null (type: void) diff --git a/ql/src/test/results/clientpositive/spark/join_rc.q.out b/ql/src/test/results/clientpositive/spark/join_rc.q.out index a10b8ad..6eb98f3 100644 --- a/ql/src/test/results/clientpositive/spark/join_rc.q.out +++ b/ql/src/test/results/clientpositive/spark/join_rc.q.out @@ -56,7 +56,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: join_rc2 @@ -78,7 +78,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: join_rc1 @@ -97,7 +97,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_reorder.q.out b/ql/src/test/results/clientpositive/spark/join_reorder.q.out index 54522f9..2b8b233 100644 --- a/ql/src/test/results/clientpositive/spark/join_reorder.q.out +++ b/ql/src/test/results/clientpositive/spark/join_reorder.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -88,7 +88,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: c @@ -107,7 +107,7 @@ STAGE PLANS: 1 (key + 1) (type: double) outputColumnNames: _col0, _col1, _col5 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) @@ -145,7 +145,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -167,7 +167,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: c @@ -186,7 +186,7 @@ STAGE PLANS: 1 (key + 1) (type: double) outputColumnNames: _col0, _col1, _col5 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) @@ -261,7 +261,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -280,7 +280,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -296,7 +296,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -312,7 +312,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -328,7 +328,7 @@ STAGE PLANS: 1 val (type: string) outputColumnNames: _col0, _col1, _col5, _col11 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) @@ -371,7 +371,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -390,7 +390,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -406,7 +406,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -422,7 +422,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -438,7 +438,7 @@ STAGE PLANS: 1 val (type: string) outputColumnNames: _col0, _col1, _col5, _col11 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) @@ -530,7 +530,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -540,23 +540,23 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -614,7 +614,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) @@ -624,23 +624,23 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: key (type: string), val (type: string) sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/spark/join_reorder2.q.out b/ql/src/test/results/clientpositive/spark/join_reorder2.q.out index 0ed7e91..c5135d5 100644 --- a/ql/src/test/results/clientpositive/spark/join_reorder2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_reorder2.q.out @@ -84,10 +84,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -95,9 +95,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} + 1 {val} 2 {key} {val} - 3 {val} + 3 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -105,19 +105,19 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} 3 {key} {val} keys: 0 key (type: string) @@ -126,11 +126,11 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -138,8 +138,8 @@ STAGE PLANS: condition expressions: 0 {key} {val} 1 {key} {val} - 2 {val} - 3 {key} {val} + 2 {key} {val} + 3 {val} keys: 0 key (type: string) 1 key (type: string) @@ -152,7 +152,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -179,7 +179,7 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - 3 Map 1 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -244,23 +244,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {val} - keys: - 0 (_col0 + 1) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work Map 2 Map Operator Tree: TableScan @@ -295,12 +278,29 @@ STAGE PLANS: 1 val (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {val} + keys: + 0 (_col0 + 1) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -345,7 +345,7 @@ STAGE PLANS: 1 (key + 1) (type: double) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_reorder3.q.out b/ql/src/test/results/clientpositive/spark/join_reorder3.q.out index 6dca7d4..98dc1a7 100644 --- a/ql/src/test/results/clientpositive/spark/join_reorder3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_reorder3.q.out @@ -84,10 +84,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -95,9 +95,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} + 1 {val} 2 {key} {val} - 3 {val} + 3 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -105,19 +105,19 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} 3 {key} {val} keys: 0 key (type: string) @@ -126,11 +126,11 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -138,8 +138,8 @@ STAGE PLANS: condition expressions: 0 {key} {val} 1 {key} {val} - 2 {val} - 3 {key} {val} + 2 {key} {val} + 3 {val} keys: 0 key (type: string) 1 key (type: string) @@ -152,7 +152,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -179,7 +179,7 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - 3 Map 1 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -244,23 +244,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {val} - keys: - 0 (_col0 + 1) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work Map 2 Map Operator Tree: TableScan @@ -295,12 +278,29 @@ STAGE PLANS: 1 val (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {key} {val} + keys: + 0 (_col0 + 1) (type: double) + 1 (key + 1) (type: double) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -345,7 +345,7 @@ STAGE PLANS: 1 (key + 1) (type: double) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_reorder4.q.out b/ql/src/test/results/clientpositive/spark/join_reorder4.q.out index 92096a4..be44b5a 100644 --- a/ql/src/test/results/clientpositive/spark/join_reorder4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_reorder4.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -79,7 +79,7 @@ STAGE PLANS: 2 key3 (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -103,7 +103,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -125,8 +125,8 @@ STAGE PLANS: 2 key3 (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -175,7 +175,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -194,7 +194,7 @@ STAGE PLANS: 2 key3 (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -218,7 +218,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -240,8 +240,8 @@ STAGE PLANS: 2 key3 (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -290,7 +290,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -309,7 +309,7 @@ STAGE PLANS: 2 key3 (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -333,7 +333,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -355,8 +355,8 @@ STAGE PLANS: 2 key3 (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/join_star.q.out b/ql/src/test/results/clientpositive/spark/join_star.q.out index 0c1e285..38707b9 100644 --- a/ql/src/test/results/clientpositive/spark/join_star.q.out +++ b/ql/src/test/results/clientpositive/spark/join_star.q.out @@ -140,7 +140,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: dim1 @@ -162,7 +162,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: fact @@ -181,7 +181,7 @@ STAGE PLANS: 1 f1 (type: int) outputColumnNames: _col0, _col1, _col8 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) @@ -237,38 +237,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: dim2 + alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: f3 is not null (type: boolean) + predicate: f1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col8} - 1 {f4} + 0 {m1} {m2} {d2} + 1 {f2} keys: - 0 _col3 (type: int) - 1 f3 (type: int) + 0 d1 (type: int) + 1 f1 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: dim1 + alias: dim2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: f1 is not null (type: boolean) + predicate: f3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {m1} {m2} {d2} - 1 {f2} + 0 {_col0} {_col1} {_col8} + 1 {f4} keys: - 0 d1 (type: int) - 1 f1 (type: int) + 0 _col3 (type: int) + 1 f3 (type: int) Local Work: Map Reduce Local Work @@ -276,7 +276,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: fact @@ -308,7 +308,7 @@ STAGE PLANS: 1 f3 (type: int) outputColumnNames: _col0, _col1, _col8, _col13 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) @@ -366,38 +366,38 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: dim2 + alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: f3 is not null (type: boolean) + predicate: (f1 is not null and f2 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col8} - 1 {f4} + 0 {m1} {m2} + 1 {f2} keys: - 0 _col8 (type: int) - 1 f3 (type: int) + 0 d1 (type: int) + 1 f1 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: dim1 + alias: dim2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (f1 is not null and f2 is not null) (type: boolean) + predicate: f3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {m1} {m2} - 1 {f2} + 0 {_col0} {_col1} {_col8} + 1 {f4} keys: - 0 d1 (type: int) - 1 f1 (type: int) + 0 _col8 (type: int) + 1 f3 (type: int) Local Work: Map Reduce Local Work @@ -405,7 +405,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: fact @@ -437,7 +437,7 @@ STAGE PLANS: 1 f3 (type: int) outputColumnNames: _col0, _col1, _col8, _col13 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 4 Data size: 58 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) @@ -495,32 +495,32 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: dim2 + alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col8} - 1 {f4} + 0 {m1} {m2} + 1 {f2} keys: - 0 _col8 (type: int) - 1 f3 (type: int) + 0 d1 (type: int) + 1 f1 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: dim1 + alias: dim2 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {m1} {m2} - 1 {f2} + 0 {_col0} {_col1} {_col8} + 1 {f4} keys: - 0 d1 (type: int) - 1 f1 (type: int) + 0 _col8 (type: int) + 1 f3 (type: int) Local Work: Map Reduce Local Work @@ -528,7 +528,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: fact @@ -557,7 +557,7 @@ STAGE PLANS: 1 f3 (type: int) outputColumnNames: _col0, _col1, _col8, _col13 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 8 Data size: 117 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) @@ -631,21 +631,21 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: dim3 + alias: dim1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col8} {_col13} - 1 {f6} + 0 {m1} {m2} {d2} + 1 {f2} keys: - 0 _col3 (type: int) - 1 f5 (type: int) + 0 d1 (type: int) + 1 f1 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: dim2 @@ -659,38 +659,24 @@ STAGE PLANS: 1 f3 (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: dim1 + alias: dim3 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {m1} {m2} {d2} - 1 {f2} + 0 {_col0} {_col1} {_col8} {_col13} + 1 {f6} keys: - 0 d1 (type: int) - 1 f1 (type: int) + 0 _col3 (type: int) + 1 f5 (type: int) Local Work: Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: dim7 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col8} {_col13} {_col18} {_col23} {_col28} {_col33} - 1 {f14} - keys: - 0 _col28 (type: int) - 1 f13 (type: int) - Local Work: - Map Reduce Local Work - Map 6 - Map Operator Tree: - TableScan - alias: dim6 + alias: dim4 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -703,7 +689,7 @@ STAGE PLANS: 2 f11 (type: int) Local Work: Map Reduce Local Work - Map 7 + Map 6 Map Operator Tree: TableScan alias: dim5 @@ -717,10 +703,10 @@ STAGE PLANS: 1 f9 (type: int) Local Work: Map Reduce Local Work - Map 8 + Map 7 Map Operator Tree: TableScan - alias: dim4 + alias: dim6 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -733,12 +719,26 @@ STAGE PLANS: 2 f11 (type: int) Local Work: Map Reduce Local Work + Map 8 + Map Operator Tree: + TableScan + alias: dim7 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col8} {_col13} {_col18} {_col23} {_col28} {_col33} + 1 {f14} + keys: + 0 _col28 (type: int) + 1 f13 (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: fact @@ -754,7 +754,7 @@ STAGE PLANS: 1 f1 (type: int) outputColumnNames: _col0, _col1, _col3, _col8 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -767,7 +767,7 @@ STAGE PLANS: 1 f3 (type: int) outputColumnNames: _col0, _col1, _col3, _col8, _col13 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -780,7 +780,7 @@ STAGE PLANS: 1 f5 (type: int) outputColumnNames: _col0, _col1, _col8, _col13, _col18 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -796,8 +796,8 @@ STAGE PLANS: 2 f11 (type: int) outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28 input vertices: - 1 Map 8 - 2 Map 6 + 1 Map 5 + 2 Map 7 Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -810,7 +810,7 @@ STAGE PLANS: 1 f9 (type: int) outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33 input vertices: - 1 Map 7 + 1 Map 6 Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -823,7 +823,7 @@ STAGE PLANS: 1 f13 (type: int) outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33, _col38 input vertices: - 1 Map 5 + 1 Map 8 Statistics: Num rows: 15 Data size: 339 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int), _col28 (type: int), _col38 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/join_thrift.q.out b/ql/src/test/results/clientpositive/spark/join_thrift.q.out index 81059d8..11ffd70 100644 --- a/ql/src/test/results/clientpositive/spark/join_thrift.q.out +++ b/ql/src/test/results/clientpositive/spark/join_thrift.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: s2 @@ -62,7 +62,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: s1 @@ -81,7 +81,7 @@ STAGE PLANS: 1 aint (type: int) outputColumnNames: _col0, _col17 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col17 (type: array>) diff --git a/ql/src/test/results/clientpositive/spark/join_vc.q.out b/ql/src/test/results/clientpositive/spark/join_vc.q.out index f69fa11..caa23fe 100644 --- a/ql/src/test/results/clientpositive/spark/join_vc.q.out +++ b/ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -20,48 +20,48 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: t3 + alias: t2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {key} {BLOCK__OFFSET__INSIDE__FILE} + 1 {value} keys: - 0 _col6 (type: string) - 1 value (type: string) + 0 key (type: string) + 1 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan - alias: t2 + alias: t3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 - 1 {value} + 1 {key} {BLOCK__OFFSET__INSIDE__FILE} keys: - 0 key (type: string) - 1 key (type: string) + 0 _col6 (type: string) + 1 value (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -80,7 +80,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col6 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -93,7 +93,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col10, _col11, _col12 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col12 (type: bigint), _col10 (type: string), _col11 (type: string) @@ -105,7 +105,7 @@ STAGE PLANS: Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) @@ -157,7 +157,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t2 @@ -179,7 +179,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -198,7 +198,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col7 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: bigint) diff --git a/ql/src/test/results/clientpositive/spark/join_view.q.out b/ql/src/test/results/clientpositive/spark/join_view.q.out index 2d94c12..b11b855 100644 --- a/ql/src/test/results/clientpositive/spark/join_view.q.out +++ b/ql/src/test/results/clientpositive/spark/join_view.q.out @@ -52,7 +52,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Local Work: Map Reduce Local Work @@ -60,7 +60,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index d8443fb..4d1013d 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -727,23 +727,19 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: double) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE @@ -849,23 +845,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-1 Spark @@ -904,18 +896,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string), _col1 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 5 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out index 81d06f4..92693e6 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out @@ -73,39 +73,33 @@ STAGE PLANS: TableScan alias: src Filter Operator - predicate: ((key > 20) and (key < 40)) (type: boolean) + predicate: (key < 20) (type: boolean) Select Operator - expressions: key (type: string), value (type: string), '33' (type: string) + expressions: key (type: string), value (type: string), '22' (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part13 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part13 Map 3 Map Operator Tree: TableScan alias: src Filter Operator - predicate: (key < 20) (type: boolean) + predicate: ((key > 20) and (key < 40)) (type: boolean) Select Operator - expressions: key (type: string), value (type: string), '22' (type: string) + expressions: key (type: string), value (type: string), '33' (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part13 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part13 Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index edc4984..060745d 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -74,7 +74,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'k2' (type: string), '' (type: string) + expressions: 'k1' (type: string), UDFToString(null) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE Limit @@ -90,15 +90,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'k3' (type: string), ' ' (type: string) + expressions: 'k2' (type: string), '' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 85500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Map 6 Map Operator Tree: @@ -106,15 +106,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'k1' (type: string), UDFToString(null) (type: string) + expressions: 'k3' (type: string), ' ' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 85500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -123,16 +123,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Limit Number of rows: 2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 Reducer 5 Reduce Operator Tree: Select Operator @@ -140,16 +137,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Limit Number of rows: 2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 Reducer 7 Reduce Operator Tree: Select Operator @@ -157,16 +151,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Limit Number of rows: 2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part14 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.nzhang_part14 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index 78ce909..196deef 100644 --- a/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -237,7 +237,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -258,7 +258,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -498,7 +498,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -577,7 +577,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -603,7 +603,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col7, _col8 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -990,7 +990,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1207,7 +1207,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1228,7 +1228,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1472,7 +1472,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1546,7 +1546,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1567,7 +1567,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col7, _col8 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/spark/mapjoin1.q.out b/ql/src/test/results/clientpositive/spark/mapjoin1.q.out index 9632138..0e5907c 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin1.q.out @@ -39,7 +39,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -61,7 +61,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -80,7 +80,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -141,7 +141,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -163,7 +163,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -182,7 +182,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -267,7 +267,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -290,7 +290,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 1 + 0 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) @@ -351,7 +351,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -370,7 +370,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -386,7 +386,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -445,7 +445,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -467,7 +467,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -486,7 +486,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -547,7 +547,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -566,7 +566,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -589,7 +589,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 1 + 0 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out index 6aa41e0..2cf9772 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out @@ -92,7 +92,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t2 @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -133,7 +133,7 @@ STAGE PLANS: 1 dec (type: decimal(4,0)) outputColumnNames: _col0, _col4 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out index 2096a8a..fcc8d6a 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out @@ -20,7 +20,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: d @@ -41,11 +41,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: c @@ -64,25 +64,21 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -94,7 +90,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -169,7 +165,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: d @@ -190,10 +186,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: c @@ -212,25 +208,21 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -305,7 +297,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: d @@ -326,11 +318,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 3) - Reducer 4 <- Reducer 3 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: c @@ -349,20 +341,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -374,7 +362,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -449,7 +437,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: d @@ -470,10 +458,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: c @@ -492,20 +480,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out index b5e58d5..80e730e 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -65,15 +65,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 300) (type: boolean) - Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 300) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -85,18 +85,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 300) and (key < 10)) (type: boolean) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -112,10 +112,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -141,8 +141,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -155,7 +155,7 @@ STAGE PLANS: value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) @@ -238,15 +238,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 300) (type: boolean) - Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 300) and (key < 10)) (type: boolean) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} filter predicates: 0 @@ -258,18 +258,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 300) and (key < 10)) (type: boolean) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 300) (type: boolean) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} filter predicates: 0 @@ -285,10 +285,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src3 @@ -314,8 +314,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -328,7 +328,7 @@ STAGE PLANS: value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out index 2f0ac83..7aa8ce9 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out @@ -38,6 +38,8 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 +Status: Failed +Status: Failed [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 RUN: Stage-3:MAPRED RUN: Stage-1:MAPRED @@ -48,6 +50,9 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 +Status: Failed +Status: Failed +Status: Failed [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 RUN: Stage-4:MAPRED RUN: Stage-3:MAPRED diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index d929e98..a009cf8 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -68,20 +68,20 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} + 0 {key} 1 keys: - 0 _col0 (type: string) - 1 key (type: string) + 0 value (type: string) + 1 value (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -90,7 +90,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -100,14 +100,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -120,37 +120,37 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [src1] + /src [src] Map 3 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} + 0 {_col0} 1 keys: - 0 value (type: string) - 1 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) Position of Big Table: 0 Local Work: Map Reduce Local Work @@ -159,7 +159,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -169,14 +169,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -189,20 +189,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [src] + /src1 [src1] Stage: Stage-1 Spark @@ -229,7 +229,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -243,7 +243,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -491,35 +491,35 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + predicate: (value > 'val_450') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} + 0 {key} 1 keys: - 0 _col0 (type: string) - 1 key (type: string) + 0 value (type: string) + 1 value (type: string) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value > 'val_450') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} + 0 {_col0} 1 keys: - 0 value (type: string) - 1 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) Local Work: Map Reduce Local Work @@ -546,7 +546,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -559,7 +559,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -600,35 +600,35 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col2} + 0 {key} {ds} 1 keys: - 0 _col0 (type: string) - 1 key (type: string) + 0 value (type: string) + 1 value (type: string) Local Work: Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {ds} + 0 {_col2} 1 keys: - 0 value (type: string) - 1 value (type: string) + 0 _col0 (type: string) + 1 key (type: string) Local Work: Map Reduce Local Work @@ -657,7 +657,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col2 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -670,24 +670,20 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + aggregations: count() + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out index 942ba07..f01f641 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out @@ -38,7 +38,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -60,7 +60,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -79,7 +79,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index 0f242a9..ca0e150 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -88,34 +88,30 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -338,34 +334,30 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out index 1a66028..23d99d8 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out @@ -92,7 +92,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -131,7 +131,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: y @@ -150,7 +150,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col6 (type: int), _col5 (type: string), _col0 (type: int), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out index 4bc10b5..a8cc26e 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -253,16 +253,16 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} keys: 0 key (type: string) 1 key (type: string) @@ -272,13 +272,13 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src3 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} keys: 0 key (type: string) 1 key (type: string) @@ -289,10 +289,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -311,8 +311,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -324,7 +324,7 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -1095,16 +1095,16 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src3 - Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} keys: 0 key (type: string) 1 key (type: string) @@ -1114,13 +1114,13 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src3 + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} keys: 0 key (type: string) 1 key (type: string) @@ -1131,10 +1131,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -1153,8 +1153,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) @@ -1166,7 +1166,7 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/mergejoins.q.out b/ql/src/test/results/clientpositive/spark/mergejoins.q.out index d47c454..0ab5602 100644 --- a/ql/src/test/results/clientpositive/spark/mergejoins.q.out +++ b/ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -52,10 +52,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: val1 is not null (type: boolean) @@ -63,9 +63,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {val1} {val2} - 1 {val1} {val2} + 1 {val2} 2 {val1} {val2} - 3 {val2} + 3 {val1} {val2} keys: 0 val1 (type: int) 1 val1 (type: int) @@ -73,27 +73,31 @@ STAGE PLANS: 3 val1 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: e + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: val2 is not null (type: boolean) + predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} {_col15} {_col16} - 1 {val1} + 0 {val1} {val2} + 1 {val1} {val2} + 2 {val2} + 3 {val1} {val2} keys: - 0 _col1 (type: int) - 1 val2 (type: int) + 0 val1 (type: int) + 1 val1 (type: int) + 2 val1 (type: int) + 3 val1 (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: b + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: val1 is not null (type: boolean) @@ -101,9 +105,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {val1} {val2} - 1 {val2} + 1 {val1} {val2} 2 {val1} {val2} - 3 {val1} {val2} + 3 {val2} keys: 0 val1 (type: int) 1 val1 (type: int) @@ -111,25 +115,21 @@ STAGE PLANS: 3 val1 (type: int) Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan - alias: c + alias: e Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: val1 is not null (type: boolean) + predicate: val2 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {val1} {val2} - 1 {val1} {val2} - 2 {val2} - 3 {val1} {val2} + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} {_col15} {_col16} + 1 {val1} keys: - 0 val1 (type: int) - 1 val1 (type: int) - 2 val1 (type: int) - 3 val1 (type: int) + 0 _col1 (type: int) + 1 val2 (type: int) Local Work: Map Reduce Local Work @@ -137,7 +137,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan alias: a @@ -162,9 +162,9 @@ STAGE PLANS: 3 val1 (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 1 Map 3 - 2 Map 4 - 3 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -177,7 +177,7 @@ STAGE PLANS: 1 val2 (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16, _col20, _col21 input vertices: - 1 Map 2 + 1 Map 5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int), _col20 (type: int), _col21 (type: int) @@ -215,7 +215,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -235,7 +235,7 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -260,7 +260,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -283,8 +283,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out b/ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index ec8adc4..8575c5b 100644 --- a/ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ b/ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -28,17 +28,17 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {key} {value} + 1 {value} 2 {key} {value} - 3 {value} + 3 {key} {value} keys: 0 key (type: string) 1 key (type: string) @@ -46,16 +46,16 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {value} - 2 {key} {value} + 1 {key} {value} + 2 {value} 3 {key} {value} keys: 0 key (type: string) @@ -64,17 +64,17 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} 1 {key} {value} - 2 {value} - 3 {key} {value} + 2 {key} {value} + 3 {value} keys: 0 key (type: string) 1 key (type: string) @@ -87,7 +87,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -111,7 +111,7 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - 3 Map 1 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -149,15 +149,15 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} 3 {key} {value} keys: @@ -167,16 +167,16 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {key} {value} - 2 {value} + 1 {value} + 2 {key} {value} 3 {key} {value} keys: 0 key (type: string) @@ -185,16 +185,16 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 3 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} 3 {key} {value} keys: 0 key (type: string) @@ -208,7 +208,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: d @@ -230,7 +230,7 @@ STAGE PLANS: 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 0 Map 4 + 0 Map 1 1 Map 2 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -273,14 +273,14 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} 2 {key} {value} - 3 {value} + 3 {key} {value} keys: 0 key (type: string) 1 key (type: string) @@ -309,14 +309,14 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} 2 {key} {value} - 3 {key} {value} + 3 {value} keys: 0 key (type: string) 1 key (type: string) @@ -351,9 +351,9 @@ STAGE PLANS: 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 0 Map 4 + 0 Map 1 1 Map 2 - 3 Map 1 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -391,15 +391,15 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} 3 {key} {value} keys: @@ -409,16 +409,16 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {key} {value} - 2 {value} + 1 {value} + 2 {key} {value} 3 {key} {value} keys: 0 key (type: string) @@ -427,16 +427,16 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 3 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} 3 {key} {value} keys: 0 key (type: string) @@ -450,7 +450,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: d @@ -472,7 +472,7 @@ STAGE PLANS: 3 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 0 Map 4 + 0 Map 1 1 Map 2 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -514,50 +514,50 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {value} - 1 {key} {value} - 2 {value} + 1 {value} + 2 {key} {value} keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} 1 {value} - 2 {key} {value} keys: - 0 key (type: string) + 0 _col6 (type: string) 1 key (type: string) - 2 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {value} + 0 {key} {value} + 1 {key} {value} + 2 {value} keys: - 0 _col6 (type: string) + 0 key (type: string) 1 key (type: string) + 2 key (type: string) Local Work: Map Reduce Local Work @@ -565,7 +565,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -585,7 +585,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -637,15 +637,15 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: string) @@ -653,15 +653,15 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 2 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: string) @@ -674,7 +674,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: d @@ -693,7 +693,7 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 + 0 Map 1 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator @@ -726,7 +726,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 0 Map 1 + 0 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) @@ -769,7 +769,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -802,7 +802,7 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -911,7 +911,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -959,7 +959,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: d @@ -975,7 +975,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -1054,17 +1054,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -1104,6 +1093,17 @@ STAGE PLANS: value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Local Work: Map Reduce Local Work + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1174,20 +1174,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -1215,6 +1201,20 @@ STAGE PLANS: 1 key (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} + 1 {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark @@ -1236,7 +1236,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -1249,7 +1249,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -1288,7 +1288,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -1310,7 +1310,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan alias: a @@ -1329,7 +1329,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -1344,21 +1344,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) - Map 4 + Map 3 Map Operator Tree: TableScan alias: c @@ -1374,7 +1363,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 5 + 0 Map 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -1384,7 +1373,18 @@ STAGE PLANS: value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Local Work: Map Reduce Local Work - Reducer 2 + Map 5 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -1430,22 +1430,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - 2 {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work Map 2 Map Operator Tree: TableScan @@ -1479,12 +1463,28 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} {_col5} {_col6} + 1 {key} {value} + 2 {value} + keys: + 0 _col6 (type: string) + 1 key (type: string) + 2 key (type: string) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -1520,7 +1520,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 input vertices: 1 Map 3 - 2 Map 1 + 2 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/multi_insert.q.out b/ql/src/test/results/clientpositive/spark/multi_insert.q.out index 941c430..0a38bea 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert.q.out @@ -598,15 +598,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -619,18 +615,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -639,18 +631,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -756,15 +744,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -777,18 +761,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -797,18 +777,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -914,15 +890,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -935,18 +907,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -955,18 +923,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -1072,15 +1036,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -1093,18 +1053,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1113,18 +1069,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-0 Move Operator @@ -1231,28 +1183,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1262,28 +1208,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -1418,28 +1358,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1449,28 +1383,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -1605,28 +1533,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1636,28 +1558,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -1792,28 +1708,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1823,28 +1733,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out index 5bbfe19..ef3c50c 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out @@ -54,15 +54,11 @@ STAGE PLANS: Filter Operator predicate: ((key > 450) or (key > 500)) (type: boolean) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index bd2b5ae..965c5a1 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -95,18 +95,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index fe29a5a..57a0eb5 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -1774,38 +1774,30 @@ STAGE PLANS: expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 + value expressions: _col1 (type: double) + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 Reducer 3 Reduce Operator Tree: Forward diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index 9c19cdd..c26ce5b 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -309,22 +309,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -335,22 +331,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Map 5 Map Operator Tree: TableScan @@ -365,22 +357,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -391,22 +379,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator @@ -415,18 +399,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -435,18 +415,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Stage: Stage-0 Move Operator @@ -581,22 +557,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -607,22 +579,18 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(_col5) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Map 5 Map Operator Tree: TableScan @@ -631,16 +599,12 @@ STAGE PLANS: Filter Operator predicate: ((key > 200) or (key < 200)) (type: boolean) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -649,18 +613,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Reducer 3 Reduce Operator Tree: Forward @@ -674,18 +634,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -695,18 +651,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Stage: Stage-0 Move Operator @@ -995,18 +947,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -1015,18 +963,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Reducer 4 Reduce Operator Tree: Group By Operator @@ -1035,18 +979,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Stage: Stage-0 Move Operator @@ -1243,21 +1183,17 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -1268,21 +1204,17 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -1297,21 +1229,17 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -1322,21 +1250,17 @@ STAGE PLANS: Lateral View Join Operator outputColumnNames: _col0, _col5 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: double) - outputColumnNames: _col0, _col5 + Group By Operator + aggregations: sum(DISTINCT _col5) + keys: _col0 (type: string), _col5 (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col5) - keys: _col0 (type: string), _col5 (type: double) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -1345,15 +1269,11 @@ STAGE PLANS: Filter Operator predicate: ((key > 200) or (key < 200)) (type: boolean) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -1362,18 +1282,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -1382,18 +1298,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 Reducer 4 Reduce Operator Tree: Forward @@ -1407,18 +1319,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1428,18 +1336,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv4 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index 7df0f39..e89864a 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -123,25 +123,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -154,25 +150,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index ea17fa1..639f4bd 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -615,15 +615,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -636,18 +632,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -656,18 +648,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -777,15 +765,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -798,18 +782,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -818,18 +798,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -939,15 +915,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -960,18 +932,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -980,18 +948,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -1101,15 +1065,11 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Forward @@ -1122,18 +1082,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1142,18 +1098,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Stage: Stage-3 Dependency Collection @@ -1264,28 +1216,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1295,28 +1241,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -1455,28 +1395,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1486,28 +1420,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -1646,28 +1574,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1677,28 +1599,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -1837,28 +1753,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Map 3 Map Operator Tree: TableScan @@ -1868,28 +1778,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: (_col0 < 10) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi1 Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 Union 2 Vertex: Union 2 @@ -2431,24 +2335,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2458,10 +2358,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2477,17 +2377,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -2499,17 +2395,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-3 Dependency Collection @@ -2570,24 +2462,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2597,10 +2485,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2616,17 +2504,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -2638,17 +2522,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-3 Dependency Collection @@ -2709,24 +2589,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2736,10 +2612,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2755,17 +2631,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -2777,17 +2649,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-3 Dependency Collection @@ -2848,24 +2716,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2875,10 +2739,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2894,17 +2758,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -2916,17 +2776,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-3 Dependency Collection @@ -3025,24 +2881,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3052,10 +2904,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3071,17 +2923,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -3093,17 +2941,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-5 Dependency Collection @@ -3275,24 +3119,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3302,10 +3142,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3321,17 +3161,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -3343,17 +3179,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-5 Dependency Collection @@ -3525,24 +3357,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3552,10 +3380,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3571,17 +3399,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -3593,17 +3417,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-5 Dependency Collection @@ -3775,24 +3595,20 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) or ((key > 10) and (key < 20))) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3802,10 +3618,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3821,17 +3637,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 6 Reduce Operator Tree: Forward @@ -3843,17 +3655,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-5 Dependency Collection diff --git a/ql/src/test/results/clientpositive/spark/multi_join_union.q.out b/ql/src/test/results/clientpositive/spark/multi_join_union.q.out index e0fe067..d8dc110 100644 --- a/ql/src/test/results/clientpositive/spark/multi_join_union.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_join_union.q.out @@ -62,7 +62,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: b @@ -83,11 +83,25 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 5 (PARTITION-LEVEL SORT, 3), Union 3 (PARTITION-LEVEL SORT, 3) - Union 3 <- Map 2 (NONE, 0), Map 6 (NONE, 0) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Union 2 (PARTITION-LEVEL SORT, 3) + Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: src13 + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Map 4 Map Operator Tree: TableScan alias: src14 @@ -96,14 +110,11 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) Map 5 Map Operator Tree: TableScan @@ -123,7 +134,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 6 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col6 (type: string) @@ -133,24 +144,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) Local Work: Map Reduce Local Work - Map 6 - Map Operator Tree: - TableScan - alias: src13 - Filter Operator - predicate: value is not null (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -171,8 +165,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index 1cd3952..d6e82fb 100644 --- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -663,6 +663,76 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: + -mr-10003default.src{} [src] + Path -> Partition: + -mr-10003default.src{} + Partition + base file name: src + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + -mr-10003default.src{} [src] + Map 4 + Map Operator Tree: + TableScan alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -873,80 +943,6 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: - -mr-10003default.src{} [src] - Path -> Partition: - -mr-10003default.src{} - Partition - base file name: src - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - -mr-10003default.src{} [src] Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -957,29 +953,26 @@ STAGE PLANS: Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -990,29 +983,26 @@ STAGE PLANS: Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 3 Vertex: Union 3 @@ -1040,8 +1030,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2000 0 +2000 PREHOOK: query: explain extended select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b PREHOOK: type: QUERY @@ -1551,17 +1541,17 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -1644,7 +1634,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -1663,34 +1653,31 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 4 Position of Big Table: 0 Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 + NumFilesPerFileSink: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1742,8 +1729,8 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -1811,7 +1798,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: s2 @@ -1885,7 +1872,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: s1 @@ -1906,7 +1893,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out index ad358ad..c6f88cc 100644 --- a/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/outer_join_ppr.q.out @@ -118,28 +118,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -147,13 +144,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -163,29 +158,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -226,12 +243,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -272,12 +289,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -316,33 +333,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -350,11 +348,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -364,26 +364,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -594,28 +594,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string), ds (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -623,13 +620,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -639,29 +634,51 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -702,12 +719,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -748,12 +765,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -792,33 +809,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -826,11 +824,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -840,26 +840,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] Reducer 2 Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/parallel.q.out b/ql/src/test/results/clientpositive/spark/parallel.q.out index 9ab3c73..4d054b9 100644 --- a/ql/src/test/results/clientpositive/spark/parallel.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel.q.out @@ -68,54 +68,42 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Forward - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_a + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_a Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_b + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_b Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/parallel_join0.q.out b/ql/src/test/results/clientpositive/spark/parallel_join0.q.out index b5af1b0..a23bc88 100644 --- a/ql/src/test/results/clientpositive/spark/parallel_join0.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel_join0.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -53,10 +53,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 4) + Reducer 2 <- Map 1 (SORT, 4) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -79,19 +79,15 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 0af1327..3473f11 100644 --- a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -51,7 +51,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -70,7 +70,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index c54bb99..eb00be3 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2755,7 +2755,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -2829,10 +2829,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -2853,7 +2853,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2919,7 +2919,7 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [t1] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -3067,7 +3067,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -3141,10 +3141,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -3165,7 +3165,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -3231,7 +3231,7 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [t1] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index b1de2ed..56d6186 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -56,10 +56,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -85,7 +85,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) @@ -108,7 +108,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -164,7 +164,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -189,10 +189,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -215,7 +215,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) @@ -238,7 +238,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_join.q.out index 1c5ba04..02b7d19 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -59,7 +59,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -85,7 +85,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) @@ -576,7 +576,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -602,7 +602,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -625,7 +625,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out index 5b9a1f4..a2925c7 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -37,7 +37,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -66,7 +66,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -92,7 +92,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col1 is not null (type: boolean) @@ -137,7 +137,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 0 Map 1 + 0 Map 2 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) @@ -1741,7 +1741,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -1767,7 +1767,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -1790,7 +1790,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -1829,7 +1829,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 0 Map 1 + 0 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out index 39f7655..75d576d 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -36,7 +36,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -93,7 +93,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -122,7 +122,7 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 1 Map 1 + 1 Map 2 2 Map 3 Statistics: Num rows: 52 Data size: 558 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1783,7 +1783,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -1834,7 +1834,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -1860,7 +1860,7 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 1 Map 1 + 1 Map 2 2 Map 3 Statistics: Num rows: 160 Data size: 1705 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join4.q.out b/ql/src/test/results/clientpositive/spark/ppd_join4.q.out index 71439cd..1cdc522 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join4.q.out @@ -45,18 +45,36 @@ join test_tbl t3 on (t2.id=t3.id ) where t2.name='c' and t3.id='a' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: test_tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((id is not null and (name = 'c')) and (id = 'a')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 'a' (type: string) + 1 'a' (type: string) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator @@ -72,56 +90,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: test_tbl - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: ((id is not null and (name = 'c')) and (id = 'a')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 'a' (type: string) - 1 'a' (type: string) - input vertices: - 1 Map 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 'a' (type: string), 'c' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -136,6 +104,7 @@ where t2.name='c' and t3.id='a' PREHOOK: type: QUERY PREHOOK: Input: default@test_tbl #### A masked pattern was here #### +Status: Failed POSTHOOK: query: select t2.* from (select id,name from (select id,name from test_tbl) t1 sort by id) t2 diff --git a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out index bd5e24f..ce8cf45 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -52,7 +52,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -69,7 +69,7 @@ STAGE PLANS: 1 id (type: string), id (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -91,7 +91,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -110,7 +110,7 @@ STAGE PLANS: 1 id (type: string), id (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -123,7 +123,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col6, _col11 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index 29acb9b..04b2548 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -128,10 +128,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -141,24 +141,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -208,7 +204,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Needs Tagging: false @@ -240,7 +236,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -261,7 +257,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col8, _col9 input vertices: - 1 Reducer 3 + 1 Reducer 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -511,10 +507,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -524,24 +520,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -591,7 +583,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Needs Tagging: false @@ -623,7 +615,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -644,7 +636,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col8, _col9 input vertices: - 1 Reducer 3 + 1 Reducer 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -894,10 +886,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -970,7 +962,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Needs Tagging: false @@ -1002,7 +994,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -1023,7 +1015,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col8, _col9 input vertices: - 1 Reducer 3 + 1 Reducer 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1273,10 +1265,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -1286,24 +1278,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: min(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1353,7 +1341,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Needs Tagging: false @@ -1385,7 +1373,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -1406,7 +1394,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col8, _col9 input vertices: - 1 Reducer 3 + 1 Reducer 2 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out index 5b07964..afb470e 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out @@ -56,7 +56,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -78,7 +78,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -97,7 +97,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < 100) (type: boolean) @@ -1320,7 +1320,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1342,7 +1342,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1361,7 +1361,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < 100) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out index 6b5f8c6..124510c 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -52,7 +52,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -71,7 +71,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) @@ -150,7 +150,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -172,7 +172,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -191,7 +191,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out index 9d9c89c..375ba82 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out @@ -30,7 +30,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -52,7 +52,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -71,7 +71,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) @@ -270,7 +270,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -292,7 +292,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -311,7 +311,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out index bbd671b..f15c29a 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join3.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -45,7 +45,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -262,7 +262,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -273,7 +273,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out index 0481992..11c7c14 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -39,15 +39,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} keys: 0 key (type: string) @@ -55,18 +55,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} keys: 0 key (type: string) @@ -79,7 +79,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -101,8 +101,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) and (sqrt(_col10) <> 13)) (type: boolean) @@ -420,15 +420,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} keys: 0 key (type: string) @@ -436,18 +436,18 @@ STAGE PLANS: 2 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} keys: 0 key (type: string) @@ -460,7 +460,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -482,8 +482,8 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out index b817168..537497c 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out @@ -48,14 +48,15 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: t3 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: 20 (type: int) + key expressions: id (type: int) sort order: + + Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Map 3 @@ -75,15 +76,14 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: id (type: int) + key expressions: 20 (type: int) sort order: + - Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), value (type: string) Reducer 2 @@ -130,10 +130,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: t3 + alias: t2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) @@ -142,17 +142,17 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {id} {key} {value} keys: 0 20 (type: int) 1 20 (type: int) 2 id (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: t2 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) @@ -161,7 +161,7 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - 2 {id} {key} {value} + 2 {key} {value} keys: 0 20 (type: int) 1 20 (type: int) @@ -173,7 +173,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -196,7 +196,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) @@ -232,10 +232,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: t3 + alias: t2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) @@ -244,17 +244,17 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {id} {key} {value} keys: 0 20 (type: int) 1 20 (type: int) 2 id (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: t2 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) @@ -263,7 +263,7 @@ STAGE PLANS: condition expressions: 0 {key} {value} 1 {key} {value} - 2 {id} {key} {value} + 2 {key} {value} keys: 0 20 (type: int) 1 20 (type: int) @@ -275,7 +275,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -298,7 +298,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 input vertices: 1 Map 2 - 2 Map 1 + 2 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out index 6ee9f72..2b75c2f 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -58,17 +58,13 @@ STAGE PLANS: Filter Operator predicate: (_col0 < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -403,31 +399,23 @@ STAGE PLANS: Filter Operator predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out b/ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out index a489a42..45464f7 100644 --- a/ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out +++ b/ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out @@ -12,7 +12,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -33,10 +33,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -54,7 +54,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -73,7 +73,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index ea0c948..7a1462c 100644 --- a/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -193,7 +193,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -219,7 +219,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -606,7 +606,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -729,7 +729,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -750,7 +750,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col7, _col8 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -990,7 +990,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1064,7 +1064,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1085,7 +1085,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1374,7 +1374,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1591,7 +1591,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1612,7 +1612,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col7, _col8 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git a/ql/src/test/results/clientpositive/spark/sample10.q.out b/ql/src/test/results/clientpositive/spark/sample10.q.out index 33cfe84..73beab2 100644 --- a/ql/src/test/results/clientpositive/spark/sample10.q.out +++ b/ql/src/test/results/clientpositive/spark/sample10.q.out @@ -329,31 +329,27 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/spark/sample8.q.out b/ql/src/test/results/clientpositive/spark/sample8.q.out index f3a66d4..f211e70 100644 --- a/ql/src/test/results/clientpositive/spark/sample8.q.out +++ b/ql/src/test/results/clientpositive/spark/sample8.q.out @@ -94,7 +94,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: s @@ -170,7 +170,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: t @@ -191,7 +191,7 @@ STAGE PLANS: 1 key (type: string), value (type: string) outputColumnNames: _col0, _col1, _col7, _col8 input vertices: - 0 Map 2 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -814,7 +814,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -837,7 +837,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -857,7 +857,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -911,7 +911,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -934,7 +934,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -954,7 +954,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = _col5) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/script_pipe.q.out b/ql/src/test/results/clientpositive/spark/script_pipe.q.out index 5b966ff..083a114 100644 --- a/ql/src/test/results/clientpositive/spark/script_pipe.q.out +++ b/ql/src/test/results/clientpositive/spark/script_pipe.q.out @@ -40,24 +40,20 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Transform Operator + command: true + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Transform Operator - command: true - output info: + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out index d90d50d..4cc5213 100644 --- a/ql/src/test/results/clientpositive/spark/semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -128,7 +128,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -158,10 +158,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -180,19 +180,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -242,7 +238,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -272,10 +268,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -294,19 +290,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -358,7 +350,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -388,10 +380,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -410,19 +402,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -466,7 +454,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -474,32 +462,28 @@ STAGE PLANS: Filter Operator predicate: (key < 15) (type: boolean) Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Group By Operator + keys: key (type: int), key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int), key (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 + keys: + 0 key (type: int) + 1 _col1 (type: int) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -518,7 +502,7 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -530,7 +514,7 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -585,7 +569,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -615,10 +599,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -637,19 +621,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -696,7 +676,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: t3 @@ -726,10 +706,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -748,7 +728,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 1 Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -760,7 +740,7 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -969,14 +949,10 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 3 @@ -1028,7 +1004,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1058,10 +1034,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1080,19 +1056,15 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1155,7 +1127,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1185,10 +1157,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1207,19 +1179,15 @@ STAGE PLANS: 1 (2 * _col0) (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -1267,7 +1235,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {value} + 1 {key} {value} + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: c @@ -1295,33 +1282,14 @@ STAGE PLANS: 2 _col0 (type: int) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1343,8 +1311,8 @@ STAGE PLANS: 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -1357,7 +1325,7 @@ STAGE PLANS: value expressions: _col2 (type: int), _col3 (type: string) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -1417,7 +1385,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1447,10 +1415,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1469,19 +1437,15 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -1539,7 +1503,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1567,7 +1531,7 @@ STAGE PLANS: 2 _col0 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -1599,10 +1563,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1624,20 +1588,16 @@ STAGE PLANS: 2 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1697,7 +1657,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -1713,7 +1673,7 @@ STAGE PLANS: 2 _col0 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -1742,10 +1702,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1764,20 +1724,16 @@ STAGE PLANS: 2 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -1849,7 +1805,23 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: c @@ -1874,30 +1846,14 @@ STAGE PLANS: 2 _col0 (type: int) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 3) + Reducer 3 <- Map 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1916,20 +1872,16 @@ STAGE PLANS: 2 _col0 (type: int) outputColumnNames: _col0 input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2009,6 +1961,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2016,7 +1978,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -2035,16 +1997,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2057,14 +2009,10 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -2140,7 +2088,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -2165,7 +2113,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -2185,10 +2133,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -2207,20 +2155,16 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2298,6 +2242,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 key (type: int) + 1 _col0 (type: int) + 2 key (type: int) + Local Work: + Map Reduce Local Work + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2320,30 +2280,14 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 3) + Reducer 4 <- Map 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -2362,20 +2306,16 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0 input vertices: - 0 Map 4 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2457,6 +2397,16 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2473,7 +2423,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -2483,16 +2433,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2505,14 +2445,10 @@ STAGE PLANS: 2 outputColumnNames: _col0 Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -2601,7 +2537,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -2627,7 +2563,7 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -2645,10 +2581,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (SORT, 3) + Reducer 2 <- Map 1 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -2667,7 +2603,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -2680,19 +2616,15 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2773,7 +2705,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -2804,7 +2736,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -2823,7 +2755,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out index 6437a0a..58b91b8 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -111,7 +111,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -130,7 +130,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) @@ -203,10 +203,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -214,9 +214,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} + 1 {val} 2 {key} {val} - 3 {val} + 3 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -224,19 +224,19 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} 3 {key} {val} keys: 0 key (type: string) @@ -245,11 +245,11 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -257,8 +257,8 @@ STAGE PLANS: condition expressions: 0 {key} {val} 1 {key} {val} - 2 {val} - 3 {key} {val} + 2 {key} {val} + 3 {val} keys: 0 key (type: string) 1 key (type: string) @@ -271,7 +271,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -298,7 +298,7 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - 3 Map 1 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -363,10 +363,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: d + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -374,9 +374,9 @@ STAGE PLANS: Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} + 1 {val} 2 {key} {val} - 3 {val} + 3 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -384,19 +384,19 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} 3 {key} {val} keys: 0 key (type: string) @@ -405,11 +405,11 @@ STAGE PLANS: 3 key (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: d + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -417,8 +417,8 @@ STAGE PLANS: condition expressions: 0 {key} {val} 1 {key} {val} - 2 {val} - 3 {key} {val} + 2 {key} {val} + 3 {val} keys: 0 key (type: string) 1 key (type: string) @@ -431,7 +431,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -458,7 +458,7 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - 3 Map 1 + 3 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) @@ -515,7 +515,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -536,10 +536,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: c @@ -558,24 +558,20 @@ STAGE PLANS: 1 (key + 1) (type: double) outputColumnNames: _col0, _col1, _col5 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -688,19 +684,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -824,19 +816,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -915,7 +903,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: src @@ -965,10 +953,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -994,25 +982,21 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 + 1 Map 3 2 Map 4 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -1078,7 +1062,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: v @@ -1096,10 +1080,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: k @@ -1115,24 +1099,20 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) diff --git a/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out b/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out index e3c42d8..1e5a71c 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -37,10 +37,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -59,20 +59,16 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out b/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out index 013b299..db4fdc2 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out @@ -77,7 +77,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -100,7 +100,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -116,14 +116,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -131,7 +129,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -147,14 +145,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -170,7 +166,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -233,7 +229,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -256,7 +252,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -272,14 +268,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 4 @@ -287,7 +281,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -303,14 +297,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 3 @@ -326,7 +318,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -397,7 +389,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -420,7 +412,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -436,15 +428,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work Map 5 @@ -452,7 +442,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -468,15 +458,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work Union 2 @@ -502,7 +490,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -571,7 +559,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -594,7 +582,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -610,15 +598,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work Map 4 @@ -626,7 +612,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -642,15 +628,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Local Work: Map Reduce Local Work Union 3 @@ -676,7 +660,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out b/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out index 7bb5267..72e6182 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out @@ -83,19 +83,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} - 2 {val} + 1 {val} + 2 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -105,16 +105,16 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} keys: 0 key (type: string) 1 key (type: string) @@ -125,15 +125,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 3 <- Map 2 (NONE, 0), Map 7 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -148,19 +148,17 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 1 + 1 Map 3 + 2 Map 4 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 7 @@ -168,7 +166,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -188,18 +186,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-3 Spark @@ -211,7 +207,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -230,7 +226,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out index 720b6b8..483f7a9 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out @@ -63,7 +63,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -86,7 +86,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -102,14 +102,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -117,7 +115,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -133,14 +131,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -156,7 +152,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -219,7 +215,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -242,7 +238,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -258,14 +254,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 4 @@ -273,7 +267,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -289,14 +283,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 3 @@ -312,7 +304,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -377,7 +369,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -401,7 +393,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -413,16 +405,13 @@ STAGE PLANS: 1 key (type: string) input vertices: 1 Map 4 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Map 6 @@ -430,7 +419,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -442,16 +431,13 @@ STAGE PLANS: 1 key (type: string) input vertices: 1 Map 5 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -485,7 +471,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -537,7 +523,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -561,7 +547,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + predicate: ((key = '2') or (key = '3')) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -573,16 +559,13 @@ STAGE PLANS: 1 key (type: string) input vertices: 0 Map 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Map 5 @@ -590,7 +573,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -602,16 +585,13 @@ STAGE PLANS: 1 key (type: string) input vertices: 0 Map 6 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 @@ -645,7 +625,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = '2') or (key = '3')) (type: boolean) + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out index f43175e..b125f6a 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out @@ -65,7 +65,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '8'))) (type: boolean) + predicate: (key is not null and (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -80,15 +80,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: b Filter Operator - predicate: (key is not null and (not (key = '8'))) (type: boolean) + predicate: (key is not null and (key = '8')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -104,13 +104,27 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 - Select Operator - SELECT * : (no compute) - Lateral View Forward - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Select Operator @@ -122,30 +136,14 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Map 5 + Map 4 Map Operator Tree: TableScan alias: b Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) + predicate: (key is not null and (not (key = '8'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -157,17 +155,31 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 0 Map 2 + 0 Map 5 Select Operator expressions: _col0 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 - Select Operator - SELECT * : (no compute) - Lateral View Forward - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col2 Select Operator @@ -179,38 +191,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Union 4 - Vertex: Union 4 + Union 3 + Vertex: Union 3 Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 5 Map Operator Tree: TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) + predicate: (key is not null and (not (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out index 70bbf00..83fbe47 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out @@ -77,7 +77,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -92,7 +92,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0), Map 6 (NONE, 0), Map 9 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0), Map 7 (NONE, 0), Map 9 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -100,7 +100,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -117,16 +117,14 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 4 @@ -151,24 +149,22 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -180,21 +176,19 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 8 + 1 Map 6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 9 @@ -214,21 +208,19 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 7 + 1 Map 8 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -260,13 +252,13 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -282,7 +274,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 8 Map Operator Tree: TableScan alias: b diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out index 159512c..037170a 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out @@ -65,7 +65,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -88,7 +88,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -104,14 +104,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -119,7 +117,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -135,14 +133,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -158,7 +154,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out index aafd608..8e0ab17 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -101,7 +101,7 @@ STAGE PLANS: 1 key (type: string) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -123,7 +123,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -142,7 +142,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: @@ -155,7 +155,7 @@ STAGE PLANS: 1 val (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out index 400c72f..6429622 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out @@ -95,7 +95,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -110,7 +110,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -119,7 +119,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) + predicate: ((key is not null and val is not null) and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -132,16 +132,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 4 - Select Operator - SELECT * : (no compute) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) Local Work: Map Reduce Local Work - Map 6 + Map 5 Map Operator Tree: TableScan alias: c @@ -160,7 +158,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (key = '2')) (type: boolean) + predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -172,14 +170,12 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 5 - Select Operator - SELECT * : (no compute) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + 1 Map 6 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) Local Work: Map Reduce Local Work Reducer 3 @@ -210,13 +206,13 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out index faea556..a35ebc1 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out @@ -103,8 +103,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} @@ -126,7 +126,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -142,14 +142,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -157,7 +155,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -173,14 +171,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -196,8 +192,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} @@ -259,8 +255,8 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {val} @@ -282,7 +278,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) + predicate: ((key = 2) or (key = 3)) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -298,14 +294,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 4 @@ -313,7 +307,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) + predicate: (not ((key = 2) or (key = 3))) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -329,14 +323,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 3 @@ -352,8 +344,8 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {val} @@ -417,8 +409,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 @@ -441,7 +433,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -453,16 +445,13 @@ STAGE PLANS: 1 key (type: int) input vertices: 1 Map 4 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Map 6 @@ -470,7 +459,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -482,16 +471,13 @@ STAGE PLANS: 1 key (type: int) input vertices: 1 Map 5 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -525,8 +511,8 @@ STAGE PLANS: alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 @@ -577,8 +563,8 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + predicate: ((key = 2) or (key = 3)) (type: boolean) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 @@ -601,7 +587,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) + predicate: ((key = 2) or (key = 3)) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -613,16 +599,13 @@ STAGE PLANS: 1 key (type: int) input vertices: 0 Map 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Map 5 @@ -630,7 +613,7 @@ STAGE PLANS: TableScan alias: b Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) + predicate: (not ((key = 2) or (key = 3))) (type: boolean) Map Join Operator condition map: Right Outer Join0 to 1 @@ -642,16 +625,13 @@ STAGE PLANS: 1 key (type: int) input vertices: 0 Map 6 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 @@ -685,8 +665,8 @@ STAGE PLANS: alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = 2) or (key = 3)) (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out index aaab975..1e96de0 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out @@ -65,7 +65,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -88,7 +88,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -104,14 +104,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -119,7 +117,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -135,14 +133,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -158,7 +154,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out index 40160cc..7d14ac1 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out @@ -69,7 +69,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -92,7 +92,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -108,14 +108,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -123,7 +121,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -139,14 +137,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -162,7 +158,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -281,7 +277,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -304,7 +300,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) + predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -320,14 +316,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -335,7 +329,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -351,14 +345,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -374,7 +366,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out index 24b9591..2ee4ed7 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -106,7 +106,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -125,7 +125,7 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out index 223dcfd..3b347ef 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out @@ -67,7 +67,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -106,14 +106,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -121,7 +119,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -137,14 +135,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -160,7 +156,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out index b0f0706..dde0fe4 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out @@ -73,7 +73,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -96,7 +96,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -112,14 +112,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -127,7 +125,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -143,14 +141,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -166,7 +162,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -226,7 +222,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -249,7 +245,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Map Join Operator condition map: Left Outer Join0 to 1 @@ -265,14 +261,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -280,7 +274,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Map Join Operator condition map: Left Outer Join0 to 1 @@ -296,14 +290,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -319,7 +311,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -383,7 +375,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -407,7 +399,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -420,21 +412,16 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Map 6 @@ -442,7 +429,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -455,21 +442,16 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -504,7 +486,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -557,7 +539,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -581,7 +563,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Map Join Operator condition map: Left Outer Join0 to 1 @@ -594,21 +576,16 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Map 6 @@ -616,7 +593,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Map Join Operator condition map: Left Outer Join0 to 1 @@ -629,21 +606,16 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -678,7 +650,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out index f783649..49d9e99 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out @@ -67,7 +67,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -106,14 +106,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -121,7 +119,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -137,14 +135,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -160,7 +156,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index c35400a..2575359 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -67,7 +67,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -106,14 +106,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -121,7 +119,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -137,14 +135,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -160,7 +156,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -225,7 +221,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -239,7 +235,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -253,7 +249,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -267,7 +263,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (((key = '2') or (key = '8')) or (key = '3')) (type: boolean) + predicate: (not (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: string) @@ -287,14 +283,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -307,14 +301,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out index 665a39f..801129c 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out @@ -63,7 +63,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -86,7 +86,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -102,14 +102,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -117,7 +115,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -133,14 +131,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -156,7 +152,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -217,7 +213,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -240,7 +236,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -256,14 +252,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -271,7 +265,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -287,14 +281,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -310,7 +302,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) + predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out index 0a63d7f..6bf9984 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out @@ -65,7 +65,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -88,7 +88,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -104,14 +104,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -119,7 +117,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -135,14 +133,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -158,7 +154,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out index f492cb2..b6052f4 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out @@ -67,7 +67,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -106,14 +106,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 5 @@ -121,7 +119,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -137,14 +135,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Union 2 @@ -160,7 +156,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out index e1d57d8..f16750e 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out @@ -77,19 +77,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} - 2 {val} + 1 {val} + 2 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -99,16 +99,16 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} keys: 0 key (type: string) 1 key (type: string) @@ -119,15 +119,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 3 <- Map 2 (NONE, 0), Map 7 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -142,19 +142,17 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 1 + 1 Map 3 + 2 Map 4 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 7 @@ -162,7 +160,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -182,18 +180,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-3 Spark @@ -205,7 +201,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -224,7 +220,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out index f916350..5fd2665 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out @@ -75,19 +75,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {key} {val} - 2 {val} + 1 {val} + 2 {key} {val} keys: 0 key (type: string) 1 key (type: string) @@ -97,16 +97,16 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: 0 {key} {val} - 1 {val} - 2 {key} {val} + 1 {key} {val} + 2 {val} keys: 0 key (type: string) 1 key (type: string) @@ -117,15 +117,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 3 <- Map 2 (NONE, 0), Map 7 (NONE, 0) + Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -140,19 +140,17 @@ STAGE PLANS: 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 4 - 2 Map 1 + 1 Map 3 + 2 Map 4 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 7 @@ -160,7 +158,7 @@ STAGE PLANS: TableScan alias: a Filter Operator - predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 @@ -180,18 +178,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-3 Spark @@ -203,7 +199,7 @@ STAGE PLANS: alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: @@ -222,7 +218,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: diff --git a/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out b/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out index 1019c2e..fbbe8fe 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -64,27 +64,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Union 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) + Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 3 - Map Operator Tree: - TableScan alias: t1 Filter Operator predicate: key is not null (type: boolean) @@ -96,7 +82,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -110,7 +96,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: string) - Reducer 2 + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -131,8 +131,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 4 - Vertex: Union 4 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -236,22 +236,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 2 Local Work: Map Reduce Local Work @@ -262,34 +258,30 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {val} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {key} {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out index a856ec9..026f83e 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out @@ -115,7 +115,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Local Work: Map Reduce Local Work @@ -123,7 +123,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Local Work: Map Reduce Local Work @@ -248,7 +248,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Local Work: Map Reduce Local Work @@ -256,7 +256,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Local Work: Map Reduce Local Work @@ -328,7 +328,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -350,7 +350,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -369,7 +369,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col6, _col7 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index b9224ec..fbe3166 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -88,7 +88,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -107,7 +107,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -155,7 +155,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -174,7 +174,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -190,7 +190,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -243,7 +243,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -262,7 +262,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -278,7 +278,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -334,24 +334,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -416,7 +416,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -438,7 +438,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -457,7 +457,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -505,7 +505,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -524,7 +524,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -540,7 +540,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -593,7 +593,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -612,7 +612,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -628,7 +628,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -684,24 +684,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out index 4db0008..cc511e4 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out @@ -82,7 +82,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -104,7 +104,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -123,7 +123,7 @@ STAGE PLANS: 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index 26dd59a..14cfa00 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -130,7 +130,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -207,10 +207,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Map 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -231,7 +231,7 @@ STAGE PLANS: 1 value (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -300,7 +300,7 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -422,7 +422,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -499,10 +499,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -523,7 +523,7 @@ STAGE PLANS: 1 UDFToDouble(value) (type: double) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -592,7 +592,7 @@ STAGE PLANS: name: default.test_table3 Truncated Path -> Alias: /test_table3 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out index fc4d7f0..451ef9e 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out @@ -104,17 +104,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -233,22 +231,18 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -258,25 +252,21 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -346,7 +336,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: b @@ -367,11 +357,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -390,27 +380,23 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -429,7 +415,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -559,17 +545,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -654,7 +638,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -679,10 +663,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -700,22 +684,20 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -821,7 +803,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -846,10 +828,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -871,22 +853,20 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1034,17 +1014,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -1171,17 +1149,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -1300,17 +1276,15 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -1425,17 +1399,15 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -1512,7 +1484,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -1562,10 +1534,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1590,23 +1562,21 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) input vertices: - 1 Map 4 - 2 Map 1 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1696,7 +1666,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1721,10 +1691,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1742,22 +1712,20 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index ee07bc6..75191dd 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -102,7 +102,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -179,10 +179,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Map 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -203,7 +203,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -272,7 +272,7 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -456,7 +456,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -533,10 +533,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Map 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -557,7 +557,7 @@ STAGE PLANS: 1 key (type: int), key2 (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -626,7 +626,7 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -758,7 +758,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -835,10 +835,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Map 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -859,7 +859,7 @@ STAGE PLANS: 1 key2 (type: int), key (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -928,7 +928,7 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -1060,7 +1060,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1137,10 +1137,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Map 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1161,7 +1161,7 @@ STAGE PLANS: 1 key (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1230,7 +1230,7 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out index 6b7a0ca..84ea5f2 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out @@ -52,7 +52,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -73,10 +73,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -94,22 +94,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index 6c736af..f487f60 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -190,10 +190,10 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: f + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -217,10 +217,10 @@ STAGE PLANS: 6 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan - alias: g + alias: c Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -244,7 +244,7 @@ STAGE PLANS: 6 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 5 Map Operator Tree: TableScan alias: d @@ -271,7 +271,7 @@ STAGE PLANS: 6 key (type: int) Local Work: Map Reduce Local Work - Map 4 + Map 6 Map Operator Tree: TableScan alias: e @@ -298,10 +298,10 @@ STAGE PLANS: 6 key (type: int) Local Work: Map Reduce Local Work - Map 5 + Map 7 Map Operator Tree: TableScan - alias: b + alias: f Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -325,10 +325,10 @@ STAGE PLANS: 6 key (type: int) Local Work: Map Reduce Local Work - Map 6 + Map 8 Map Operator Tree: TableScan - alias: c + alias: g Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -356,10 +356,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 7 + Map 1 Map Operator Tree: TableScan alias: a @@ -392,27 +392,25 @@ STAGE PLANS: 5 key (type: int) 6 key (type: int) input vertices: - 1 Map 5 - 2 Map 6 - 3 Map 3 - 4 Map 4 - 5 Map 1 - 6 Map 2 + 1 Map 3 + 2 Map 4 + 3 Map 5 + 4 Map 6 + 5 Map 7 + 6 Map 8 Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 8 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -532,14 +530,13 @@ STAGE PLANS: 4 key (type: int) 5 key (type: int) 6 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -658,14 +655,13 @@ STAGE PLANS: 5 key (type: int) 6 key (type: int) 7 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -849,7 +845,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: t + alias: q Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -859,7 +855,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: s + alias: r Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -869,7 +865,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: r + alias: s Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -879,7 +875,7 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: q + alias: t Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index bf8b6de..f7c6d74 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -88,7 +88,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -107,7 +107,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -157,7 +157,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -176,7 +176,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -192,7 +192,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -245,7 +245,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -264,7 +264,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -280,7 +280,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -338,24 +338,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -420,7 +420,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -442,7 +442,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -461,7 +461,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -511,7 +511,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -530,7 +530,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -546,7 +546,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -599,7 +599,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -618,7 +618,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -634,7 +634,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -692,24 +692,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index f1768e3..d563c80 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -63,7 +63,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: c @@ -85,7 +85,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -102,7 +102,7 @@ STAGE PLANS: 1 5 (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: d @@ -120,7 +120,7 @@ STAGE PLANS: 0 5 (type: int) 1 5 (type: int) input vertices: - 0 Map 4 + 0 Map 3 Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int) @@ -140,7 +140,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -158,7 +158,7 @@ STAGE PLANS: 0 5 (type: int) 1 5 (type: int) input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int) @@ -175,7 +175,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 = 5) (type: boolean) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index 5fd0450..d54e749 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -70,7 +70,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -92,7 +92,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -111,7 +111,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -161,7 +161,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -180,7 +180,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -196,7 +196,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -248,7 +248,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -267,7 +267,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -283,7 +283,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -341,24 +341,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -422,7 +422,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -444,7 +444,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -463,7 +463,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -513,7 +513,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -532,7 +532,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -548,7 +548,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -600,7 +600,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -619,7 +619,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -635,7 +635,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -693,24 +693,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index 3ce287f..a794a85 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -66,19 +66,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} keys: 0 key (type: int) 1 key (type: int) @@ -88,16 +88,16 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} keys: 0 key (type: int) 1 key (type: int) @@ -109,7 +109,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -131,8 +131,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 2 Map 2 + 0 Map 1 + 2 Map 3 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -182,7 +182,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -198,7 +198,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -219,7 +219,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -238,8 +238,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -289,7 +289,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -305,7 +305,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -326,7 +326,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -345,8 +345,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -404,12 +404,12 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: int) @@ -417,15 +417,15 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: int) @@ -438,7 +438,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -457,8 +457,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -518,35 +518,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -620,12 +620,12 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: int) @@ -633,15 +633,15 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: int) @@ -654,7 +654,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -673,8 +673,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -726,16 +726,16 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} keys: 0 key (type: int) 1 key (type: int) @@ -745,13 +745,13 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} keys: 0 key (type: int) 1 key (type: int) @@ -763,7 +763,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -782,8 +782,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 2 Map 2 + 0 Map 1 + 2 Map 3 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -840,12 +840,12 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: int) @@ -853,15 +853,15 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: int) @@ -874,7 +874,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -893,8 +893,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -954,35 +954,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1054,35 +1054,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1148,35 +1148,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1249,35 +1249,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1347,35 +1347,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 82c42e4..10506e8 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -66,19 +66,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} keys: 0 key (type: int) 1 key (type: int) @@ -88,16 +88,16 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} keys: 0 key (type: int) 1 key (type: int) @@ -109,7 +109,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -131,8 +131,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 2 Map 2 + 0 Map 1 + 2 Map 3 Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -182,7 +182,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -198,7 +198,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -219,7 +219,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -238,8 +238,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -289,7 +289,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -305,7 +305,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -326,7 +326,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -345,8 +345,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -404,12 +404,12 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: int) @@ -417,15 +417,15 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: int) @@ -438,7 +438,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -457,8 +457,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -518,35 +518,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -620,12 +620,12 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: int) @@ -633,15 +633,15 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: int) @@ -654,7 +654,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -673,8 +673,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -726,16 +726,16 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} + 0 {value} 1 {key} {value} - 2 {value} + 2 {key} {value} keys: 0 key (type: int) 1 key (type: int) @@ -745,13 +745,13 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} + 0 {key} {value} 1 {key} {value} - 2 {key} {value} + 2 {value} keys: 0 key (type: int) 1 key (type: int) @@ -763,7 +763,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -782,8 +782,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 2 Map 2 + 0 Map 1 + 2 Map 3 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -840,12 +840,12 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} {value} - 1 {value} + 0 {value} + 1 {key} {value} 2 {key} {value} keys: 0 key (type: int) @@ -853,15 +853,15 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 3 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {value} - 1 {key} {value} + 0 {key} {value} + 1 {value} 2 {key} {value} keys: 0 key (type: int) @@ -874,7 +874,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -893,8 +893,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 0 Map 3 - 1 Map 1 + 0 Map 1 + 1 Map 2 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) @@ -954,35 +954,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1054,35 +1054,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1148,35 +1148,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1249,35 +1249,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -1347,35 +1347,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 6d5c9ac..07659a4 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -95,7 +95,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -114,7 +114,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -1247,7 +1247,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -1269,7 +1269,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -1288,7 +1288,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -2437,7 +2437,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -2459,7 +2459,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -2478,7 +2478,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -2543,7 +2543,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -2565,7 +2565,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -2584,7 +2584,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -2646,7 +2646,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -2665,7 +2665,7 @@ STAGE PLANS: 2 key (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 3 Map Operator Tree: TableScan alias: c @@ -2689,7 +2689,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -2711,8 +2711,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index b040a6a..c48ed6d 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -629,24 +629,24 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out index d6054fc..21232c9 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -85,10 +85,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -106,22 +106,20 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out index 18525d0..3517be5 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -93,10 +93,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -114,22 +114,20 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out index 0871e8d..2e5f240 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -93,10 +93,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -114,22 +114,20 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out index 548defa..aa4cfd6 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out @@ -70,7 +70,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -91,10 +91,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -112,22 +112,20 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 key (type: string), value (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out index dbfc496..fa2f05e 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out @@ -121,7 +121,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -198,10 +198,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -221,22 +221,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -292,7 +290,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out index dc0aef8..bd1b59f 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out @@ -121,7 +121,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -197,10 +197,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -220,22 +220,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -291,7 +289,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out index e09d413..bacdb48 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out @@ -157,7 +157,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -281,10 +281,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -304,22 +304,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -422,7 +420,7 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] /srcbucket_mapjoin_part_1/part=2 [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out index 5de9dc9..81e4f59 100644 --- a/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out +++ b/ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out @@ -117,7 +117,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -138,10 +138,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -159,22 +159,20 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -234,7 +232,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -255,10 +253,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -276,22 +274,20 @@ STAGE PLANS: 0 key (type: string), value2 (type: string) 1 key (type: string), value2 (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/stats1.q.out b/ql/src/test/results/clientpositive/spark/stats1.q.out index 3ba6989..ba22d9a 100644 --- a/ql/src/test/results/clientpositive/spark/stats1.q.out +++ b/ql/src/test/results/clientpositive/spark/stats1.q.out @@ -56,16 +56,13 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Reducer 2 Reduce Operator Tree: Group By Operator @@ -75,16 +72,13 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 552b5ae..eac98b8 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: s1 @@ -55,7 +55,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -74,7 +74,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -281,11 +281,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: part @@ -296,7 +296,7 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_size (type: int) - Reducer 2 + Reducer 3 Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE @@ -318,7 +318,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: struct) - Reducer 3 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -330,28 +330,24 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) + Group By Operator + keys: _col0 (type: double) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_name} {p_size} - 1 - keys: - 0 UDFToDouble(p_size) (type: double) - 1 _col0 (type: double) + Spark HashTable Sink Operator + condition expressions: + 0 {p_name} {p_size} + 1 + keys: + 0 UDFToDouble(p_size) (type: double) + 1 _col0 (type: double) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: part @@ -370,7 +366,7 @@ STAGE PLANS: 1 _col0 (type: double) outputColumnNames: _col1, _col5 input vertices: - 1 Reducer 3 + 1 Reducer 4 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col5 (type: int) @@ -620,20 +616,16 @@ STAGE PLANS: Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 3 Local Work: Map Reduce Local Work @@ -643,22 +635,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) Stage: Stage-1 Spark @@ -782,7 +770,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: li @@ -829,10 +817,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: lineitem @@ -840,21 +828,17 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -862,48 +846,44 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 condition expressions: - 0 {_col0} - 1 {l_orderkey} {l_suppkey} + 0 {_col0} {_col3} + 1 keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 4 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 8e05bb3..1049fd0 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -232,18 +232,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 Local Work: Map Reduce Local Work Reducer 2 @@ -675,18 +671,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out index fbe392e..17c57ac 100644 --- a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out @@ -199,11 +199,11 @@ PREHOOK: Input: default@t1 #### A masked pattern was here #### Operator:GBY_2 Table:default@t1 -Keys:key,val +Keys:key Operator:GBY_8 Table:default@t1 -Keys:key +Keys:key,val 1 1 1 11 1 2 1 2 12 1 diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out index 39610d5..feb7711 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -207,31 +207,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: bar + alias: foo Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: foo + alias: bar Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/temp_table_join1.q.out b/ql/src/test/results/clientpositive/spark/temp_table_join1.q.out index e1cc9e3..b4277d6 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table_join1.q.out @@ -42,7 +42,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -64,7 +64,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -83,7 +83,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: string) @@ -145,7 +145,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -167,7 +167,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -186,7 +186,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: string) @@ -250,7 +250,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src2 @@ -272,7 +272,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src1 @@ -291,7 +291,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out b/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index 50dafde..15a6751 100644 --- a/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ b/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -19,7 +19,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -37,10 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 4 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -56,7 +56,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col5, _col6 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: string), _col6 (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Local Work: Map Reduce Local Work - Reducer 5 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -88,10 +88,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -107,20 +107,16 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Reducer 5 + 0 Reducer 2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out b/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index 6ee2d76..2e28b02 100644 --- a/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ b/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -19,7 +19,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: b @@ -37,10 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 5 <- Map 4 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: a @@ -56,7 +56,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col5, _col6 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: string), _col6 (type: string) @@ -69,7 +69,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Local Work: Map Reduce Local Work - Reducer 5 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -88,10 +88,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -107,20 +107,16 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Reducer 5 + 0 Reducer 2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union18.q.out b/ql/src/test/results/clientpositive/spark/union18.q.out index 3091362..d2bcd70 100644 --- a/ql/src/test/results/clientpositive/spark/union18.q.out +++ b/ql/src/test/results/clientpositive/spark/union18.q.out @@ -70,16 +70,13 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -99,16 +96,13 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 diff --git a/ql/src/test/results/clientpositive/spark/union19.q.out b/ql/src/test/results/clientpositive/spark/union19.q.out index cd78a89..13fb395 100644 --- a/ql/src/test/results/clientpositive/spark/union19.q.out +++ b/ql/src/test/results/clientpositive/spark/union19.q.out @@ -71,19 +71,16 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -103,19 +100,16 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -134,18 +128,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union23.q.out b/ql/src/test/results/clientpositive/spark/union23.q.out index dab602d..606153a 100644 --- a/ql/src/test/results/clientpositive/spark/union23.q.out +++ b/ql/src/test/results/clientpositive/spark/union23.q.out @@ -35,19 +35,6 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map 4 - Map Operator Tree: - TableScan - alias: src - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 Transform Operator command: cat output info: @@ -60,6 +47,19 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ + Map 4 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ Reducer 3 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/union25.q.out b/ql/src/test/results/clientpositive/spark/union25.q.out index b64ec53..c439b1a 100644 --- a/ql/src/test/results/clientpositive/spark/union25.q.out +++ b/ql/src/test/results/clientpositive/spark/union25.q.out @@ -66,10 +66,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Union 4 <- Map 7 (NONE, 0), Reducer 3 (NONE, 0) + Union 5 <- Map 4 (NONE, 0), Map 7 (NONE, 0) + Reducer 6 <- Union 5 (GROUP, 3) + Union 2 <- Map 1 (NONE, 0), Reducer 6 (NONE, 0) Reducer 3 <- Union 2 (GROUP, 3) - Reducer 5 <- Union 4 (GROUP, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +77,23 @@ STAGE PLANS: TableScan alias: a Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + value expressions: _col2 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: a + Select Operator expressions: key (type: string) outputColumnNames: _col0 Select Operator @@ -90,7 +107,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: b @@ -108,51 +125,9 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Map 7 - Map Operator Tree: - TableScan - alias: a - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - value expressions: _col2 (type: bigint) - Reducer 5 - Reduce Operator Tree: - Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -170,10 +145,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_unionall + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + value expressions: _col2 (type: bigint) Union 2 Vertex: Union 2 - Union 4 - Vertex: Union 4 + Union 5 + Vertex: Union 5 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union3.q.out b/ql/src/test/results/clientpositive/spark/union3.q.out index f835d8c..8240654 100644 --- a/ql/src/test/results/clientpositive/spark/union3.q.out +++ b/ql/src/test/results/clientpositive/spark/union3.q.out @@ -46,13 +46,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 11 <- Reducer 10 (SORT, 1) + Reducer 11 <- Map 10 (GROUP, 1) Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 8 <- Reducer 7 (SORT, 1) - Union 3 <- Reducer 11 (NONE, 0), Reducer 2 (NONE, 0), Reducer 5 (NONE, 0), Reducer 8 (NONE, 0) + Reducer 6 <- Map 5 (GROUP, 1) + Reducer 9 <- Map 8 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 7 <- Reducer 6 (SORT, 1) + Union 4 <- Reducer 11 (NONE, 0), Reducer 3 (NONE, 0), Reducer 7 (NONE, 0), Reducer 9 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +68,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Map 4 + Map 10 Map Operator Tree: TableScan alias: src @@ -81,7 +81,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Map 6 + Map 5 Map Operator Tree: TableScan alias: src @@ -94,7 +94,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Map 9 + Map 8 Map Operator Tree: TableScan alias: src @@ -107,23 +107,37 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 10 + Reducer 11 Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit + Number of rows: 1 + Select Operator + expressions: 4 (type: int) + outputColumnNames: _col0 Select Operator - expressions: 2 (type: int) + expressions: _col0 (type: int) outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 + Reduce Operator Tree: + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 11 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -137,57 +151,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Reduce Operator Tree: - Select Operator - Limit - Number of rows: 1 - Select Operator - expressions: 4 (type: int) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Reduce Operator Tree: - Select Operator - Limit - Number of rows: 1 - Select Operator - expressions: 3 (type: int) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Select Operator + Limit + Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 8 + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -201,8 +179,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Reducer 9 + Reduce Operator Tree: + Limit + Number of rows: 1 + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 4 + Vertex: Union 4 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/union30.q.out b/ql/src/test/results/clientpositive/spark/union30.q.out index 79741bd..c4eeb8d 100644 --- a/ql/src/test/results/clientpositive/spark/union30.q.out +++ b/ql/src/test/results/clientpositive/spark/union30.q.out @@ -55,15 +55,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Union 3 <- Map 6 (NONE, 0), Map 7 (NONE, 0), Reducer 2 (NONE, 0), Reducer 5 (NONE, 0) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) + Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0), Reducer 4 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union + Map 3 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -81,7 +98,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -102,23 +119,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Map 6 - Map Operator Tree: - TableScan - alias: src - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union Map 7 Map Operator Tree: TableScan @@ -136,7 +136,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Reducer 2 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -159,7 +159,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -182,8 +182,8 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union33.q.out b/ql/src/test/results/clientpositive/spark/union33.q.out index 1fbc5b5..b5a3f99 100644 --- a/ql/src/test/results/clientpositive/spark/union33.q.out +++ b/ql/src/test/results/clientpositive/spark/union33.q.out @@ -55,16 +55,13 @@ STAGE PLANS: Select Operator expressions: '0' (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Map 3 Map Operator Tree: TableScan @@ -110,16 +107,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Union 2 Vertex: Union 2 @@ -231,16 +225,13 @@ STAGE PLANS: Select Operator expressions: '0' (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Reducer 2 Reduce Operator Tree: Group By Operator @@ -265,16 +256,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/spark/union6.q.out b/ql/src/test/results/clientpositive/spark/union6.q.out index 2691b65..eb1c751 100644 --- a/ql/src/test/results/clientpositive/spark/union6.q.out +++ b/ql/src/test/results/clientpositive/spark/union6.q.out @@ -60,16 +60,13 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Reducer 2 Reduce Operator Tree: Group By Operator @@ -79,16 +76,13 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out index 431389b..be6e0e9 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_1.q.out @@ -122,19 +122,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -142,19 +136,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out index c469494..98cad44 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out @@ -89,14 +89,28 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Union 3 <- Map 4 (NONE, 0), Map 5 (NONE, 0), Reducer 2 (NONE, 0) + Reducer 4 <- Map 3 (GROUP, 1) + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 + Select Operator + expressions: key (type: string), UDFToLong(1) (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Map 3 + Map Operator Tree: + TableScan + alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string) @@ -114,23 +128,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Map 5 Map Operator Tree: TableScan @@ -138,29 +135,6 @@ STAGE PLANS: Select Operator expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 File Output Operator compressed: false table: @@ -168,8 +142,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Union 3 - Vertex: Union 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Union 2 + Vertex: Union 2 Stage: Stage-6 Conditional Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_11.q.out b/ql/src/test/results/clientpositive/spark/union_remove_11.q.out index 69aa4d3..4accb54 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_11.q.out @@ -97,7 +97,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 2 (type: int) + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) @@ -114,7 +114,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 1 (type: int) + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) diff --git a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out index 6065810..e7b2cd9 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -129,18 +129,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -149,18 +146,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 1a19430..8838728 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -132,18 +132,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -152,18 +149,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_17.q.out b/ql/src/test/results/clientpositive/spark/union_remove_17.q.out index 995f6d3..823cbaf 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_17.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_17.q.out @@ -76,7 +76,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 2 (type: int), '2' (type: string) + expressions: key (type: string), 1 (type: int), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) @@ -93,7 +93,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 1 (type: int), '1' (type: string) + expressions: key (type: string), 2 (type: int), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint), _col2 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out index 3524766..28f1b00 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -129,16 +129,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -149,16 +146,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out index d6d4add..301aad4 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_19.q.out @@ -126,19 +126,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -146,19 +140,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 @@ -503,23 +491,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Filter Operator - predicate: (_col0 >= 7.0) (type: boolean) - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Filter Operator + predicate: (_col0 >= 7.0) (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -528,23 +510,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: (_col0 + _col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Filter Operator - predicate: (_col0 >= 7.0) (type: boolean) - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Filter Operator + predicate: (_col0 >= 7.0) (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out index e0f84de..2b05b7b 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_2.q.out @@ -74,31 +74,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 4 (NONE, 0), Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Map 3 - Map Operator Tree: - TableScan - alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string) @@ -116,35 +99,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false @@ -153,8 +127,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 - Union 2 - Vertex: Union 2 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out index b88d683..c67f47b 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_20.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_20.q.out @@ -125,18 +125,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -145,18 +142,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: _col1 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out index fdc79e0..5ed88b4 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_24.q.out @@ -121,18 +121,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) + expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -141,18 +138,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out index e4724e4..9445080 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -138,19 +138,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -158,19 +152,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_3.q.out b/ql/src/test/results/clientpositive/spark/union_remove_3.q.out index e971b0c..09b8636 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_3.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_3.q.out @@ -82,7 +82,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 3 (type: int) + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) @@ -99,7 +99,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 1 (type: int) + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) @@ -116,7 +116,7 @@ STAGE PLANS: TableScan alias: inputtbl1 Select Operator - expressions: key (type: string), 2 (type: int) + expressions: key (type: string), 3 (type: int) outputColumnNames: _col0, _col1 Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) diff --git a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out index e5d618a..65d2aa1 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_4.q.out @@ -127,19 +127,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -147,19 +141,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out index e5a2c3e..41271b2 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_5.q.out @@ -81,31 +81,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 4 (NONE, 0), Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Map 3 - Map Operator Tree: - TableScan - alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string) @@ -123,35 +106,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false @@ -160,8 +134,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 - Union 2 - Vertex: Union 2 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-6 Conditional Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out index 3939c47..d7cd40b 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6.q.out @@ -125,29 +125,20 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 Reducer 5 Reduce Operator Tree: Group By Operator @@ -155,29 +146,20 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out index 018fadc..b3be932 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out @@ -126,19 +126,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -146,19 +140,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out index c964d49..8773535 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out @@ -78,31 +78,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 4 (NONE, 0), Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Map 3 - Map Operator Tree: - TableScan - alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string) @@ -120,35 +103,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 4 Map Operator Tree: TableScan alias: inputtbl1 Select Operator expressions: key (type: string), UDFToLong(1) (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 File Output Operator compressed: false @@ -157,8 +131,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Union 2 - Vertex: Union 2 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out index f93bb1d..8dc6dd8 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out @@ -85,34 +85,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0), Reducer 4 (NONE, 0) + Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 4 (NONE, 0), Map 5 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: inputtbl1 - Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Map 3 - Map Operator Tree: - TableScan - alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string) @@ -130,6 +110,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: inputtbl1 + Select Operator + expressions: key (type: string), 1 (type: int) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Map 5 Map Operator Tree: TableScan @@ -140,38 +137,29 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Reducer 4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - Union 2 - Vertex: Union 2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + Union 3 + Vertex: Union 3 Stage: Stage-6 Conditional Operator diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index e43a0ca..339dcfb 100644 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -152,22 +152,18 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index 716c545..7ec1675 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: r @@ -62,7 +62,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: l @@ -81,7 +81,7 @@ STAGE PLANS: 1 6981 (type: int) outputColumnNames: _col1, _col9 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) diff --git a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 2b58561..293684a 100644 --- a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -26,42 +26,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: hd + alias: cd Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 + 0 {ctinyint} 1 keys: - 0 _col0 (type: tinyint) - 1 ctinyint (type: tinyint) + 0 cint (type: int) + 1 cint (type: int) Local Work: Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: cd + alias: hd Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {ctinyint} + 0 1 keys: - 0 cint (type: int) - 1 cint (type: int) + 0 _col0 (type: tinyint) + 1 ctinyint (type: tinyint) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: c @@ -77,7 +77,7 @@ STAGE PLANS: 1 cint (type: int) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -89,23 +89,21 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 ctinyint (type: tinyint) input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Execution mode: vectorized - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 1927626..1ba65bc 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -32,7 +32,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: li @@ -79,10 +79,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: lineitem @@ -90,21 +90,17 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -112,48 +108,44 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 condition expressions: - 0 {_col0} - 1 {l_orderkey} {l_suppkey} + 0 {_col0} {_col3} + 1 keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 4 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 @@ -210,7 +202,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: li @@ -257,10 +249,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: lineitem @@ -268,21 +260,17 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -290,48 +278,44 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 condition expressions: - 0 {_col0} - 1 {l_orderkey} {l_suppkey} + 0 {_col0} {_col3} + 1 keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 _col1 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 4 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index 0992573..2afd353 100644 --- a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -148,26 +148,22 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: boolean), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: - - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: - + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 56d300f..f61ef7d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -54,15 +54,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -152,14 +148,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -259,15 +251,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Reduce Operator Tree: Select Operator @@ -372,15 +360,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -470,14 +454,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -577,15 +557,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Reduce Operator Tree: Select Operator @@ -690,15 +666,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: float) - sort order: + - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -788,14 +760,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -895,15 +863,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out index 866e4c0..71052f6 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -132,7 +132,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -151,7 +151,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -204,7 +204,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -226,7 +226,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -245,7 +245,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) @@ -308,7 +308,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b @@ -330,7 +330,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -349,7 +349,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 28dcce8..1bafba5 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -37,10 +37,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -59,25 +59,21 @@ STAGE PLANS: 1 cint (type: int) outputColumnNames: _col2, _col17 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Local Work: Map Reduce Local Work Execution mode: vectorized - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 8ff0f01..f076f3b 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -13,7 +13,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: v1 @@ -35,7 +35,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 2 Map Operator Tree: TableScan alias: v2 @@ -54,7 +54,7 @@ STAGE PLANS: 1 ctinyint (type: tinyint) outputColumnNames: _col0, _col1, _col5, _col15 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = _col15) (type: boolean) @@ -76,10 +76,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: v3 @@ -98,25 +98,21 @@ STAGE PLANS: 1 csmallint (type: smallint) outputColumnNames: _col1 input vertices: - 0 Map 4 + 0 Map 2 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + value expressions: _col0 (type: double) Local Work: Map Reduce Local Work Execution mode: vectorized - Reducer 2 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 5fe0020..5a29f76 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -616,18 +616,14 @@ STAGE PLANS: 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1947,7 +1943,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: p1 @@ -2020,10 +2016,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: part_orc @@ -2086,7 +2082,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Needs Tagging: false @@ -2114,7 +2110,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 input vertices: - 1 Map 1 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -4149,7 +4145,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: p1 @@ -4222,11 +4218,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 3) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: part_orc @@ -4289,7 +4285,7 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] - Reducer 3 + Reducer 2 Local Work: Map Reduce Local Work Needs Tagging: false @@ -4317,7 +4313,7 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 input vertices: - 1 Map 1 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -4328,7 +4324,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false - Reducer 4 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract @@ -4836,32 +4832,28 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - auto parallelism: false + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + auto parallelism: false Execution mode: vectorized Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) auto parallelism: false @@ -4869,19 +4861,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 28dcce8..1bafba5 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t2 @@ -37,10 +37,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -59,25 +59,21 @@ STAGE PLANS: 1 cint (type: int) outputColumnNames: _col2, _col17 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Local Work: Map Reduce Local Work Execution mode: vectorized - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git a/ql/src/test/results/clientpositive/split_sample.q.out b/ql/src/test/results/clientpositive/split_sample.q.out deleted file mode 100644 index 3a81f87..0000000 --- a/ql/src/test/results/clientpositive/split_sample.q.out +++ /dev/null @@ -1,4864 +0,0 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string) -PREHOOK: type: CREATETABLE -POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: default@ss_i_part -PREHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_src2 as select key, value from ss_i_part -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_i_part -PREHOOK: Input: default@ss_i_part@p=1 -PREHOOK: Input: default@ss_i_part@p=2 -PREHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: query: create table ss_src2 as select key, value from ss_i_part -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Input: default@ss_i_part@p=1 -POSTHOOK: Input: default@ss_i_part@p=2 -POSTHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: Output: default@ss_src2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: -- sample first split -desc ss_src2 -PREHOOK: type: DESCTABLE -POSTHOOK: query: -- sample first split -desc ss_src2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -key int None -value string None -PREHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - Split Sample: - ss_src2 - percentage: 1.0 - seed number: 0 - - Stage: Stage-0 - Fetch Operator - limit: 10 - - -PREHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 -86 val_86 -311 val_311 -27 val_27 -165 val_165 -409 val_409 -255 val_255 -278 val_278 -98 val_98 -484 val_484 -PREHOOK: query: -- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: query: -- verify seed number of sampling -insert overwrite table ss_i_part partition (p='1') select key+10000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@ss_i_part@p=3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_src3 as select key, value from ss_i_part -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_i_part -PREHOOK: Input: default@ss_i_part@p=1 -PREHOOK: Input: default@ss_i_part@p=2 -PREHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: query: create table ss_src3 as select key, value from ss_i_part -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Input: default@ss_i_part@p=1 -POSTHOOK: Input: default@ss_i_part@p=2 -POSTHOOK: Input: default@ss_i_part@p=3 -POSTHOOK: Output: default@ss_src3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t4 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src3 -POSTHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_t5 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_t3 -PREHOOK: Input: default@ss_t4 -PREHOOK: Input: default@ss_t5 -#### A masked pattern was here #### -POSTHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_t3 -POSTHOOK: Input: default@ss_t4 -POSTHOOK: Input: default@ss_t5 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -320 -PREHOOK: query: -- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: -- sample more than one split -explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 70))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - outputColumnNames: key - Group By Operator - aggregations: - expr: count(DISTINCT key) - bucketGroup: false - keys: - expr: key - type: int - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: int - sort order: + - tag: -1 - value expressions: - expr: _col1 - type: bigint - Split Sample: - ss_src2 - percentage: 70.0 - seed number: 5 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(DISTINCT KEY._col0:0._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: 10 - - -PREHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -309 -PREHOOK: query: -- sample all splits -select count(1) from ss_src2 tablesample(100 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- sample all splits -select count(1) from ss_src2 tablesample(100 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1500 -PREHOOK: query: -- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -PREHOOK: type: QUERY -POSTHOOK: query: -- subquery -explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:ss_src2 - TableScan - alias: ss_src2 - Select Operator - expressions: - expr: key - type: int - outputColumnNames: _col0 - Limit - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: int - Split Sample: - subq:ss_src2 - percentage: 1.0 - seed number: 5 - Reduce Operator Tree: - Extract - Limit - Select Operator - expressions: - expr: _col0 - type: int - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -PREHOOK: query: -- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- groupby -select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 3 -2 1 -4 1 -5 3 -8 1 -9 1 -10 1 -11 1 -12 2 -15 2 -17 1 -18 2 -19 1 -20 1 -24 2 -26 2 -27 1 -28 1 -30 1 -33 1 -34 1 -35 3 -37 2 -41 1 -42 2 -43 1 -44 1 -47 1 -51 2 -53 1 -54 1 -57 1 -58 2 -64 1 -65 1 -66 1 -67 2 -69 1 -70 3 -72 2 -74 1 -76 2 -77 1 -78 1 -80 1 -82 1 -83 2 -84 2 -85 1 -86 1 -87 1 -90 3 -92 1 -95 2 -96 1 -97 2 -98 2 -100 2 -103 2 -104 2 -105 1 -111 1 -113 2 -114 1 -116 1 -118 2 -119 3 -120 2 -125 2 -126 1 -128 3 -129 2 -131 1 -133 1 -134 2 -136 1 -137 2 -138 4 -143 1 -145 1 -146 2 -149 2 -150 1 -152 2 -153 1 -155 1 -156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 2 -165 2 -166 1 -167 3 -168 1 -169 4 -170 1 -172 2 -174 2 -175 2 -176 2 -177 1 -178 1 -179 2 -180 1 -181 1 -183 1 -186 1 -187 3 -189 1 -190 1 -191 2 -192 1 -193 3 -194 1 -195 2 -196 1 -197 2 -199 3 -200 2 -201 1 -202 1 -203 2 -205 2 -207 2 -208 3 -209 2 -213 2 -214 1 -216 2 -217 2 -218 1 -219 2 -221 2 -222 1 -223 2 -224 2 -226 1 -228 1 -229 2 -230 5 -233 2 -235 1 -237 2 -238 2 -239 2 -241 1 -242 2 -244 1 -247 1 -248 1 -249 1 -252 1 -255 2 -256 2 -257 1 -258 1 -260 1 -262 1 -263 1 -265 2 -266 1 -272 2 -273 3 -274 1 -275 1 -277 4 -278 2 -280 2 -281 2 -282 2 -283 1 -284 1 -285 1 -286 1 -287 1 -288 2 -289 1 -291 1 -292 1 -296 1 -298 3 -302 1 -305 1 -306 1 -307 2 -308 1 -309 2 -310 1 -311 3 -315 1 -316 3 -317 2 -318 3 -321 2 -322 2 -323 1 -325 2 -327 3 -331 2 -332 1 -333 2 -335 1 -336 1 -338 1 -339 1 -341 1 -342 2 -344 2 -345 1 -348 5 -351 1 -353 2 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 2 -368 1 -369 3 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 2 -384 3 -386 1 -389 1 -392 1 -393 1 -394 1 -395 2 -396 3 -397 2 -399 2 -400 1 -401 5 -402 1 -403 3 -404 2 -406 4 -407 1 -409 3 -411 1 -413 2 -414 2 -417 3 -418 1 -419 1 -421 1 -424 2 -427 1 -429 2 -430 3 -431 3 -432 1 -435 1 -436 1 -437 1 -438 3 -439 2 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 3 -455 1 -457 1 -458 2 -459 2 -460 1 -462 2 -463 2 -466 3 -467 1 -468 4 -469 5 -470 1 -472 1 -475 1 -477 1 -478 2 -479 1 -480 3 -481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 4 -490 1 -491 1 -492 2 -493 1 -494 1 -495 1 -496 1 -497 1 -498 3 -PREHOOK: query: -- sample one of two tables: -create table ss_src1 as select * from ss_src2 -PREHOOK: type: CREATETABLE_AS_SELECT -PREHOOK: Input: default@ss_src2 -POSTHOOK: query: -- sample one of two tables: -create table ss_src1 as select * from ss_src2 -POSTHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: Input: default@ss_src2 -POSTHOOK: Output: default@ss_src1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src1 -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -2 -2 -2 -4 -4 -4 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -8 -8 -8 -9 -9 -9 -10 -10 -10 -11 -11 -11 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -17 -17 -17 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -19 -19 -19 -20 -20 -20 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -27 -27 -27 -28 -28 -28 -30 -30 -30 -33 -33 -33 -34 -34 -34 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -41 -41 -41 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -43 -43 -43 -44 -44 -44 -47 -47 -47 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -51 -53 -53 -53 -54 -54 -54 -57 -57 -57 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -64 -64 -64 -65 -65 -65 -66 -66 -66 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -69 -69 -69 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -74 -74 -74 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -77 -77 -77 -78 -78 -78 -80 -80 -80 -82 -82 -82 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -85 -85 -85 -86 -86 -86 -87 -87 -87 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -92 -92 -92 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -95 -96 -96 -96 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -98 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -105 -105 -105 -111 -111 -111 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -114 -114 -114 -116 -116 -116 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -120 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -126 -126 -126 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -131 -131 -131 -133 -133 -133 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -136 -136 -136 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -143 -143 -143 -145 -145 -145 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -150 -150 -150 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -153 -153 -153 -155 -155 -155 -156 -156 -156 -157 -157 -157 -158 -158 -158 -160 -160 -160 -162 -162 -162 -163 -163 -163 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -166 -166 -166 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -168 -168 -168 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -170 -170 -170 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -177 -177 -177 -178 -178 -178 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -180 -180 -180 -181 -181 -181 -183 -183 -183 -186 -186 -186 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -189 -189 -189 -190 -190 -190 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -192 -192 -192 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -194 -194 -194 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -196 -196 -196 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -197 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -201 -201 -201 -202 -202 -202 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -205 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -214 -214 -214 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -218 -218 -218 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -222 -222 -222 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -226 -226 -226 -228 -228 -228 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -235 -235 -235 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -241 -241 -241 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -244 -244 -244 -247 -247 -247 -248 -248 -248 -249 -249 -249 -252 -252 -252 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -257 -257 -257 -258 -258 -258 -260 -260 -260 -262 -262 -262 -263 -263 -263 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -266 -266 -266 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -274 -274 -274 -275 -275 -275 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -282 -283 -283 -283 -284 -284 -284 -285 -285 -285 -286 -286 -286 -287 -287 -287 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -288 -289 -289 -289 -291 -291 -291 -292 -292 -292 -296 -296 -296 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -302 -302 -302 -305 -305 -305 -306 -306 -306 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -308 -308 -308 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -310 -310 -310 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -315 -315 -315 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -323 -323 -323 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -332 -332 -332 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -335 -335 -335 -336 -336 -336 -338 -338 -338 -339 -339 -339 -341 -341 -341 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -345 -345 -345 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -351 -351 -351 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -356 -356 -356 -360 -360 -360 -362 -362 -362 -364 -364 -364 -365 -365 -365 -366 -366 -366 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -368 -368 -368 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -373 -373 -373 -374 -374 -374 -375 -375 -375 -377 -377 -377 -378 -378 -378 -379 -379 -379 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -386 -386 -386 -389 -389 -389 -392 -392 -392 -393 -393 -393 -394 -394 -394 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -400 -400 -400 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -402 -402 -402 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -404 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -407 -407 -407 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -411 -411 -411 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -418 -418 -418 -419 -419 -419 -421 -421 -421 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -427 -427 -427 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -432 -432 -432 -435 -435 -435 -436 -436 -436 -437 -437 -437 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -443 -443 -443 -444 -444 -444 -446 -446 -446 -448 -448 -448 -449 -449 -449 -452 -452 -452 -453 -453 -453 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -455 -455 -455 -457 -457 -457 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -460 -460 -460 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -467 -467 -467 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -470 -470 -470 -472 -472 -472 -475 -475 -475 -477 -477 -477 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -479 -479 -479 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -481 -481 -481 -482 -482 -482 -483 -483 -483 -484 -484 -484 -485 -485 -485 -487 -487 -487 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -490 -490 -490 -491 -491 -491 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -493 -493 -493 -494 -494 -494 -495 -495 -495 -496 -496 -496 -497 -497 -497 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -PREHOOK: query: -- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -PREHOOK: type: QUERY -POSTHOOK: query: -- sample two tables -explain select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME ss_src1) (TOK_TABLESPLITSAMPLE TOK_PERCENT 80) t1) (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 2) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) k)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (TOK_FUNCTION in (TOK_TABLE_OR_COL k) 199 10199 20199) (TOK_FUNCTION in (TOK_TABLE_OR_COL k1) 199 10199 20199))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - subq:t1 - TableScan - alias: t1 - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 0 - value expressions: - expr: key - type: int - subq:t2 - TableScan - alias: t2 - Reduce Output Operator - key expressions: - expr: key - type: int - sort order: + - Map-reduce partition columns: - expr: key - type: int - tag: 1 - value expressions: - expr: key - type: int - Split Sample: - subq:t1 - percentage: 80.0 - seed number: 5 - subq:t2 - percentage: 2.0 - seed number: 5 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 {VALUE._col0} - handleSkewJoin: false - outputColumnNames: _col0, _col4 - Filter Operator - predicate: - expr: ((_col4) IN (199, 10199, 20199) or (_col0) IN (199, 10199, 20199)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col4 - type: int - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src1 -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select * from ( -select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key -) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -PREHOOK: query: -- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -POSTHOOK: query: -- shrink last split -explain select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - percentage: 1.0 - seed number: 5 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: select count(1) from ss_src2 tablesample(50 percent) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(50 percent) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1000 -PREHOOK: query: --HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B) -PREHOOK: type: QUERY -POSTHOOK: query: --HIVE-3401 more split samplings - --- total length -explain -select count(1) from ss_src2 tablesample(100B) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 100B))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - seed number: 5 - total length: 100 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(100B) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(100B) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(1K) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(1K) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 1K))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Split Sample: - ss_src2 - seed number: 5 - total length: 1024 - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(1K) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1K) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 -PREHOOK: query: -- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: -- row per split -explain -select key, value from ss_src2 tablesample(0 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 0))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 0 - Select Operator - expressions: - expr: key - type: int - expr: value - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select key, value from ss_src2 tablesample(0 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key, value from ss_src2 tablesample(0 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 10))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 10 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -10 -PREHOOK: query: explain -select count(1) from ss_src2 tablesample(100 ROWS) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(1) from ss_src2 tablesample(100 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 100))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Alias -> Map Operator Tree: - ss_src2 - TableScan - alias: ss_src2 - Row Limit Per Split: 100 - Select Operator - Group By Operator - aggregations: - expr: count(1) - bucketGroup: false - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: count(VALUE._col0) - bucketGroup: false - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: bigint - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - - Stage: Stage-0 - Fetch Operator - limit: -1 - - -PREHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -100 -PREHOOK: query: select key from ss_src2 tablesample(200B) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from ss_src2 tablesample(200B) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -265 -193 -401 -150 -273 -224 -369 -66 -PREHOOK: query: select key from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: select key from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 -PREHOOK: query: -- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS) -PREHOOK: type: QUERY -PREHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: query: -- ROW type works with other input formats (others, don't) -select count(1) from ss_src2 tablesample(10 ROWS) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@ss_src2 -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -10 -PREHOOK: query: --HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: --HIVE-5061 row sampling in sub-query -select * from (select * from src TABLESAMPLE (1 ROWS)) x -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 diff --git a/ql/src/test/results/clientpositive/stats1.q.out b/ql/src/test/results/clientpositive/stats1.q.out index 480ba13..d317328 100644 --- a/ql/src/test/results/clientpositive/stats1.q.out +++ b/ql/src/test/results/clientpositive/stats1.q.out @@ -67,18 +67,14 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -88,18 +84,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/stats11.q.out b/ql/src/test/results/clientpositive/stats11.q.out index c7f1abe..427829a 100644 --- a/ql/src/test/results/clientpositive/stats11.q.out +++ b/ql/src/test/results/clientpositive/stats11.q.out @@ -374,13 +374,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 @@ -565,10 +565,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin [a] - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -596,11 +596,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -669,7 +669,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -738,7 +738,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-7 + Stage: Stage-6 Move Operator files: hdfs directory: true @@ -917,13 +917,13 @@ TOK_QUERY STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 @@ -1072,10 +1072,10 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -1108,11 +1108,11 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1196,7 +1196,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -1280,7 +1280,7 @@ STAGE PLANS: Truncated Path -> Alias: #### A masked pattern was here #### - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true diff --git a/ql/src/test/results/clientpositive/subq2.q.out b/ql/src/test/results/clientpositive/subq2.q.out index e661887..d91e352 100644 --- a/ql/src/test/results/clientpositive/subq2.q.out +++ b/ql/src/test/results/clientpositive/subq2.q.out @@ -22,22 +22,18 @@ STAGE PLANS: Filter Operator predicate: (key >= 90) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/subq_where_serialization.q.out b/ql/src/test/results/clientpositive/subq_where_serialization.q.out index f91a7d5..d72c3be 100644 --- a/ql/src/test/results/clientpositive/subq_where_serialization.q.out +++ b/ql/src/test/results/clientpositive/subq_where_serialization.q.out @@ -3,15 +3,15 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src.key from src where src.key in ( select distinct key from src) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-2 - Stage-6 has a backup stage: Stage-2 - Stage-4 depends on stages: Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-4, Stage-2 + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1 + Stage-5 has a backup stage: Stage-1 + Stage-3 depends on stages: Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -20,46 +20,38 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Conditional Operator - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -76,7 +68,7 @@ STAGE PLANS: 0 key (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -105,16 +97,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -125,6 +111,12 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/subquery_exists_having.q.out b/ql/src/test/results/clientpositive/subquery_exists_having.q.out index e6c1716..41d35e9 100644 --- a/ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -35,22 +35,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -186,22 +182,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/subquery_in.q.out b/ql/src/test/results/clientpositive/subquery_in.q.out index 90d33ad..bb0d876 100644 --- a/ql/src/test/results/clientpositive/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/subquery_in.q.out @@ -23,6 +23,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -42,18 +54,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -242,13 +242,13 @@ part where part.p_size in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -284,7 +284,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -301,32 +301,22 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) + Group By Operator + keys: _col0 (type: double) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -338,6 +328,12 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(p_size) (type: double) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -592,41 +588,33 @@ STAGE PLANS: Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -743,13 +731,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -758,41 +746,39 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -804,12 +790,6 @@ STAGE PLANS: Map-reduce partition columns: l_partkey (type: int) Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE value expressions: l_orderkey (type: int), l_suppkey (type: int) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -818,7 +798,7 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -826,7 +806,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -834,7 +814,7 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: int) TableScan alias: lineitem @@ -864,14 +844,14 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col2} 1 outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out index 991a6e2..f5cc6bb 100644 --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -69,69 +69,57 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + Map-reduce partition columns: _col1 (type: bigint) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -157,23 +145,22 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -181,16 +168,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -287,22 +283,18 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -449,52 +441,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: part_subq + alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: avg(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_size), min(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) + value expressions: _col1 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) + aggregations: avg(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 740 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) - Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -504,14 +480,14 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -537,40 +513,48 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: max(p_size), min(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: max(VALUE._col0), min(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 740 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) < 20) (type: boolean) + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -611,49 +595,41 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: part_subq + alias: b Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: avg(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_size), min(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) + value expressions: _col1 (type: struct) TableScan - alias: b + alias: part_subq Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_size (type: int) - outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: max(p_size), min(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 15 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: int), _col2 (type: int) Reduce Operator Tree: Demux Operator Statistics: Num rows: 30 Data size: 3172 Basic stats: COMPLETE Column stats: NONE @@ -831,22 +807,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -921,22 +893,18 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1017,22 +985,18 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1193,22 +1157,18 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -1266,35 +1226,38 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - Reduce Operator Tree: - Extract - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _wcol0 is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _wcol0 (type: string) - outputColumnNames: _col0 + expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + outputColumnNames: p_mfgr, p_name, p_size Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string) + aggregations: avg(p_size) + keys: p_mfgr (type: string), p_name (type: string) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator @@ -1302,11 +1265,11 @@ STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $INTNAME1 TableScan HashTable Sink Operator condition expressions: @@ -1347,17 +1310,17 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: double) TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: double) Reduce Operator Tree: Join Operator condition map: @@ -1385,38 +1348,35 @@ STAGE PLANS: TableScan alias: part_subq Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + Reduce Operator Tree: + Extract + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: _wcol0 is not null (type: boolean) Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - outputColumnNames: p_mfgr, p_name, p_size + expressions: _wcol0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) - keys: p_mfgr (type: string), p_name (type: string) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1692 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out index b678fda..062881f 100644 --- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -276,18 +276,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 Stage: Stage-0 Move Operator @@ -681,18 +677,14 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out index b31a75f..21a7262 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -253,20 +253,16 @@ STAGE PLANS: Filter Operator predicate: (value > 'val_2') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) diff --git a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out index f377e66..7e45cd9 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -260,20 +260,16 @@ STAGE PLANS: Filter Operator predicate: (value > 'val_12') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) diff --git a/ql/src/test/results/clientpositive/subquery_notin.q.out b/ql/src/test/results/clientpositive/subquery_notin.q.out index bf133ce..9514521 100644 --- a/ql/src/test/results/clientpositive/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[18][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[18][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr explain select * @@ -18,13 +18,13 @@ where src.key not in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -69,20 +69,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -99,7 +99,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -154,7 +154,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[18][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -531,7 +531,7 @@ Manufacturer#4 almond azure aquamarine papaya violet 12 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -Warning: Shuffle Join JOIN[34][tables = [part, sq_1_notin_nullcheck]] in Stage 'Stage-6:MAPRED' is a cross product +Warning: Shuffle Join JOIN[34][tables = [part, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- agg, non corr explain select p_name, p_size @@ -553,16 +553,16 @@ part where part.p_size not in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-5 is a root stage Stage-6 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -598,7 +598,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -612,33 +612,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator key expressions: UDFToDouble(_col5) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col5) (type: double) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col5 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -663,7 +659,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -699,7 +695,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -742,20 +738,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -778,7 +774,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[34][tables = [part, sq_1_notin_nullcheck]] in Stage 'Stage-7:MAPRED' is a cross product +Warning: Shuffle Join JOIN[34][tables = [part, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_name, p_size from part where part.p_size not in @@ -1176,7 +1172,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join JOIN[18][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[18][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- alternate not in syntax select * from src @@ -1340,7 +1336,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join JOIN[24][tables = [t1_v, sq_1_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[24][tables = [t1_v, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1350,13 +1346,13 @@ select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -1408,14 +1404,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1429,6 +1421,10 @@ STAGE PLANS: sort order: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -1445,7 +1441,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1499,7 +1495,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[24][tables = [t1_v, sq_1_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[24][tables = [t1_v, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out index 7be5060..999c7f9 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -31,46 +31,37 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '12') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -78,12 +69,12 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) TableScan Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -153,37 +144,46 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Filter Operator + predicate: ((key > '12') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -507,11 +507,11 @@ having b.p_mfgr not in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-4 Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 is a root stage + Stage-5 is a root stage + Stage-6 depends on stages: Stage-5 Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -625,6 +625,49 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_retailprice + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(p_retailprice), min(p_retailprice) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_mfgr is null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE @@ -668,7 +711,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -701,49 +744,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_mfgr (type: string), p_retailprice (type: double) - outputColumnNames: p_mfgr, p_retailprice - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_retailprice), min(p_retailprice) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 52cbb09..bc382e9 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -49,6 +49,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + alias: src11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key1 is not null and value1 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string), value1 (type: string) + sort order: ++ + Map-reduce partition columns: key1 (type: string), value1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -68,17 +79,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: src11 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key1 (type: string), value1 (type: string) - sort order: ++ - Map-reduce partition columns: key1 (type: string), value1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -195,13 +195,13 @@ from part b where b.p_size in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -238,7 +238,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -274,16 +274,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -295,6 +289,12 @@ STAGE PLANS: Map-reduce partition columns: p_size (type: int), p_mfgr (type: string) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -501,41 +501,33 @@ STAGE PLANS: Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -614,22 +606,18 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count() + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out index c928db4..0a64c69 100644 --- a/ql/src/test/results/clientpositive/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/subquery_views.q.out @@ -69,8 +69,8 @@ POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@cv2 -Warning: Shuffle Join JOIN[42][tables = [b, sq_1_notin_nullcheck]] in Stage 'Stage-5:MAPRED' is a cross product Warning: Shuffle Join JOIN[18][tables = [b, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[42][tables = [b, sq_1_notin_nullcheck]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: explain select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') @@ -80,17 +80,17 @@ select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-5 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-5 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-7 + Stage-9 is a root stage + Stage-6 depends on stages: Stage-9 + Stage-7 depends on stages: Stage-6 Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -135,23 +135,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < '11') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -160,7 +160,7 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -168,7 +168,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -176,49 +176,44 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((value > 'val_11') and (key < '11')) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), key (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col5 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col5 is null (type: boolean) - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -302,18 +297,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key < '11') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) TableScan Reduce Output Operator @@ -327,7 +322,7 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col1} 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -335,7 +330,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -343,44 +338,49 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col0 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_11') and key is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (((value > 'val_11') and (key < '11')) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string), key (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 0 {KEY.reducesinkkey0} 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col5 is null (type: boolean) - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -388,8 +388,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[42][tables = [b, sq_1_notin_nullcheck]] in Stage 'Stage-5:MAPRED' is a cross product Warning: Shuffle Join JOIN[18][tables = [b, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[42][tables = [b, sq_1_notin_nullcheck]] in Stage 'Stage-6:MAPRED' is a cross product PREHOOK: query: select * from cv2 where cv2.key in (select key from cv2 c where c.key < '11') PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/table_access_keys_stats.q.out b/ql/src/test/results/clientpositive/table_access_keys_stats.q.out index fbe392e..17c57ac 100644 --- a/ql/src/test/results/clientpositive/table_access_keys_stats.q.out +++ b/ql/src/test/results/clientpositive/table_access_keys_stats.q.out @@ -199,11 +199,11 @@ PREHOOK: Input: default@t1 #### A masked pattern was here #### Operator:GBY_2 Table:default@t1 -Keys:key,val +Keys:key Operator:GBY_8 Table:default@t1 -Keys:key +Keys:key,val 1 1 1 11 1 2 1 2 12 1 diff --git a/ql/src/test/results/clientpositive/temp_table.q.out b/ql/src/test/results/clientpositive/temp_table.q.out index 4c2d3de..7470384 100644 --- a/ql/src/test/results/clientpositive/temp_table.q.out +++ b/ql/src/test/results/clientpositive/temp_table.q.out @@ -283,41 +283,33 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: bar - Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + alias: foo + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan - alias: foo - Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + alias: bar + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/temp_table_join1.q.out b/ql/src/test/results/clientpositive/temp_table_join1.q.out index dfb227e..73aa9fd 100644 --- a/ql/src/test/results/clientpositive/temp_table_join1.q.out +++ b/ql/src/test/results/clientpositive/temp_table_join1.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -51,9 +51,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -63,6 +62,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -129,7 +129,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -139,9 +139,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -151,6 +150,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -219,7 +219,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -229,9 +229,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) TableScan - alias: src1 + alias: src2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -241,6 +240,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/tez/auto_join0.q.out b/ql/src/test/results/clientpositive/tez/auto_join0.q.out index 58201ad..e61d05e 100644 --- a/ql/src/test/results/clientpositive/tez/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_join0.q.out @@ -59,14 +59,10 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/tez/auto_join1.q.out b/ql/src/test/results/clientpositive/tez/auto_join1.q.out index 85708d7..e2fb443 100644 --- a/ql/src/test/results/clientpositive/tez/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_join1.q.out @@ -24,26 +24,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 2 - Map Operator Tree: - TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -60,7 +46,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) @@ -74,6 +60,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Map 2 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out index 2b4a68d..5a0f79e 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_1.q.out @@ -149,47 +149,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -202,22 +182,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -225,29 +205,69 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -289,54 +309,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -344,25 +341,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -463,27 +461,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -496,22 +512,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -519,71 +535,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -625,31 +599,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -657,26 +654,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -777,27 +773,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -810,22 +824,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -833,71 +847,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -939,31 +911,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -971,26 +966,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out index 3872291..eeaac3b 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out @@ -100,14 +100,13 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -131,14 +130,13 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Map 5 Map Operator Tree: TableScan @@ -254,23 +252,19 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -295,17 +289,15 @@ STAGE PLANS: input vertices: 0 Reducer 2 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out index 6658313..8ef9727 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_11.q.out @@ -145,47 +145,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -198,51 +178,91 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -283,79 +303,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -464,47 +462,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -517,51 +495,91 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -602,79 +620,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -782,11 +778,80 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### - Vertices: - Map 1 + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 2 Map Operator Tree: TableScan alias: b @@ -802,27 +867,25 @@ STAGE PLANS: condition expressions: 0 1 - Estimated key counts: Map 3 => 1 + Estimated key counts: Map 1 => 1 keys: 0 key (type: string) 1 key (type: string) input vertices: - 0 Map 3 + 0 Map 1 Position of Big Table: 1 Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -923,76 +986,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1115,13 +1109,82 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1137,29 +1200,27 @@ STAGE PLANS: 0 1 2 - Estimated key counts: Map 4 => 1, Map 3 => 58 + Estimated key counts: Map 1 => 1, Map 4 => 58 keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) input vertices: - 0 Map 4 - 2 Map 3 + 0 Map 1 + 2 Map 4 Position of Big Table: 1 Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1260,7 +1321,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] - Map 3 + Map 4 Map Operator Tree: TableScan alias: c @@ -1377,76 +1438,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_big/ds=2008-04-08 [c] /bucket_big/ds=2008-04-09 [c] - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out index ce98312..7e62db9 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out @@ -138,7 +138,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 3' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -211,21 +211,28 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE - tag: 1 - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -238,22 +245,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium - numFiles 3 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 170 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -261,24 +268,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_medium + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_medium { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_medium - name: default.bucket_medium + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_medium/ds=2008-04-08 [d] + /bucket_small/ds=2008-04-08 [a] Map 2 Map Operator Tree: TableScan @@ -289,13 +296,28 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {key} + Estimated key counts: Map 1 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col6 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string), _col6 (type: string) + sort order: ++ + Map-reduce partition columns: _col6 (type: string), _col6 (type: string) + Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -362,21 +384,17 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 condition expressions: 0 1 - 2 - Estimated key counts: Map 5 => 1, Map 2 => 1 + Estimated key counts: Map 2 => 1 keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) + 0 _col6 (type: string), _col6 (type: string) + 1 key (type: string), key (type: string) input vertices: - 0 Map 5 - 1 Map 2 - Position of Big Table: 2 - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + 0 Map 2 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -387,22 +405,20 @@ STAGE PLANS: 0 1 input vertices: - 1 Map 1 + 1 Map 5 Position of Big Table: 0 - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 139 Data size: 14064 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -508,20 +524,14 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 170 Basic stats: PARTIAL Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -534,22 +544,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_medium + numFiles 3 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_medium { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 170 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -557,24 +567,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_medium partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_medium { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_medium + name: default.bucket_medium Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] + /bucket_medium/ds=2008-04-08 [d] Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -615,7 +625,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 3' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_14.q.out index c594c85..11fe42e 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_14.q.out @@ -50,23 +50,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -79,20 +69,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -154,13 +152,23 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -173,30 +181,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_15.q.out index 255b185..f48af9a 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_15.q.out @@ -48,23 +48,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -77,20 +67,28 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -129,13 +127,23 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -148,30 +156,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_2.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_2.q.out index ebcffcb..25887d1 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_2.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_2.q.out @@ -129,27 +129,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -162,22 +180,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -185,71 +203,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -291,31 +267,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -323,26 +322,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -445,27 +443,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -478,22 +494,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -501,71 +517,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -607,31 +581,54 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -639,26 +636,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_3.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_3.q.out index af5f1ce..d035a36 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_3.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_3.q.out @@ -129,47 +129,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -182,22 +162,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -205,51 +185,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -291,31 +249,72 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -323,26 +322,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -443,27 +441,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -476,22 +492,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -499,29 +515,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -563,74 +601,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -638,25 +633,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -757,27 +753,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -790,22 +804,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -813,29 +827,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -877,74 +913,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -952,25 +945,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_4.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_4.q.out index d12da62..889ef64 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_4.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_4.q.out @@ -145,47 +145,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -198,22 +178,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -221,51 +201,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -307,31 +265,72 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 2 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -339,26 +338,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -459,27 +457,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 2 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -492,22 +508,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -515,29 +531,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -579,74 +617,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -654,25 +649,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -773,27 +769,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 2 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -806,22 +820,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -829,29 +843,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 4 @@ -893,74 +929,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_small name: default.bucket_small - Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -968,25 +961,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out index b42bac7..cb50150 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_5.q.out @@ -110,10 +110,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -191,19 +191,17 @@ STAGE PLANS: 1 key (type: string) Position of Big Table: 1 Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -252,7 +250,7 @@ STAGE PLANS: name: default.bucket_big Truncated Path -> Alias: /bucket_big [b] - Reducer 2 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -347,10 +345,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: b @@ -428,19 +426,17 @@ STAGE PLANS: 1 key (type: string) Position of Big Table: 0 Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -489,7 +485,7 @@ STAGE PLANS: name: default.bucket_big Truncated Path -> Alias: /bucket_big [a] - Reducer 3 + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -584,50 +580,68 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: bucket_small + base file name: bucket_big input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -636,84 +650,64 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small [b] - Map 2 + /bucket_big [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: bucket_big + base file name: bucket_small input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 - serialization.ddl struct bucket_big { string key, string value} + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -722,25 +716,25 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 - serialization.ddl struct bucket_big { string key, string value} + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big [a] - Reducer 3 + /bucket_small [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_7.q.out index 424f191..f025365 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_7.q.out @@ -162,47 +162,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -215,22 +195,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -238,22 +218,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -263,22 +243,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -286,42 +266,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 3 + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 2 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -334,22 +332,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -357,22 +355,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -382,22 +380,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -405,26 +403,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -527,27 +525,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 2 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -560,22 +576,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -583,22 +599,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -608,22 +624,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -631,62 +647,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -699,22 +695,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -722,22 +718,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -747,22 +743,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -770,26 +766,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -892,27 +888,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 2 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -925,22 +939,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -948,22 +962,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -973,22 +987,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 4 + name default.bucket_big + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -996,62 +1010,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1064,22 +1058,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1087,22 +1081,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -1112,22 +1106,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 2 + name default.bucket_small + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 226 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1135,26 +1129,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_8.q.out index a2d8e0f..506bc7f 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_8.q.out @@ -162,47 +162,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -215,22 +195,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -238,22 +218,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -263,22 +243,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -286,42 +266,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [b] - /bucket_big/ds=2008-04-09 [b] - Map 3 + /bucket_small/ds=2008-04-08 [a] + /bucket_small/ds=2008-04-09 [a] + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 1 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -334,22 +332,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -357,22 +355,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -382,22 +380,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -405,26 +403,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [a] - /bucket_small/ds=2008-04-09 [a] - Reducer 2 + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -527,27 +525,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -560,22 +576,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -583,22 +599,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -608,22 +624,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -631,62 +647,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -699,22 +695,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -722,22 +718,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -747,22 +743,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -770,26 +766,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -894,27 +890,45 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Estimated key counts: Map 3 => 1 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -927,22 +941,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -950,22 +964,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -975,22 +989,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small - numFiles 2 + name default.bucket_big + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 114 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -998,62 +1012,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_small + name default.bucket_big partition_columns ds partition_columns.types string - serialization.ddl struct bucket_small { string key, string value} + serialization.ddl struct bucket_big { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small + name: default.bucket_big + name: default.bucket_big Truncated Path -> Alias: - /bucket_small/ds=2008-04-08 [b] - /bucket_small/ds=2008-04-09 [b] - Map 2 + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 1 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1066,22 +1060,22 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1089,22 +1083,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small #### A masked pattern was here #### Partition base file name: ds=2008-04-09 @@ -1114,22 +1108,22 @@ STAGE PLANS: ds 2008-04-09 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big - numFiles 4 + name default.bucket_small + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 114 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1137,26 +1131,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.bucket_big + name default.bucket_small partition_columns ds partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} + serialization.ddl struct bucket_small { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Reducer 3 + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 2 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_9.q.out index fb44d5f..98e4083 100644 --- a/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_9.q.out @@ -86,17 +86,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -199,22 +197,18 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -308,27 +302,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -345,25 +326,34 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -382,7 +372,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -478,28 +468,15 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) Map 4 <- Map 6 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -516,24 +493,33 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -555,22 +541,18 @@ STAGE PLANS: input vertices: 1 Map 6 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -584,42 +566,38 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Group By Operator @@ -627,17 +605,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -743,17 +717,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -851,13 +823,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -873,37 +862,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1007,8 +977,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1023,12 +993,28 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map 2 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: a @@ -1040,30 +1026,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1185,17 +1153,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -1311,17 +1277,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -1446,17 +1410,15 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1562,17 +1524,15 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1647,8 +1607,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1663,23 +1623,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -1693,20 +1636,35 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) input vertices: - 1 Map 4 - 2 Map 1 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1724,7 +1682,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1812,13 +1770,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1834,37 +1809,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1964,17 +1920,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -2077,22 +2031,18 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -2186,27 +2136,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2223,25 +2160,34 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2260,7 +2206,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2356,28 +2302,15 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) Map 4 <- Map 6 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2394,24 +2327,33 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -2433,22 +2375,18 @@ STAGE PLANS: input vertices: 1 Map 6 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -2462,42 +2400,38 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Group By Operator @@ -2505,17 +2439,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2621,17 +2551,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -2729,13 +2657,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2751,37 +2696,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2885,8 +2811,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2901,12 +2827,28 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map 2 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: a @@ -2918,30 +2860,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3063,17 +2987,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -3198,17 +3120,15 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -3314,17 +3234,15 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -3399,8 +3317,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3415,23 +3333,6 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -3445,20 +3346,35 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) input vertices: - 1 Map 4 - 2 Map 1 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -3476,7 +3392,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3564,13 +3480,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3586,37 +3519,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out index 512349d..50b4383 100644 --- a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out @@ -124,12 +124,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 2 (CUSTOM_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -146,7 +160,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) @@ -159,20 +173,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Stage: Stage-0 Fetch Operator @@ -202,13 +202,33 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -225,7 +245,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) @@ -238,31 +258,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: srcbucket_mapjoin - Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -305,27 +301,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 4 (CUSTOM_EDGE) - Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 2 - Map Operator Tree: - TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -342,25 +324,21 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col6, _col7 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string) - outputColumnNames: _col6, _col7 + Group By Operator + aggregations: sum(substr(_col7, 5)) + keys: _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(_col7, 5)) - keys: _col6 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Map 4 + value expressions: _col1 (type: double) + Map 3 Map Operator Tree: TableScan alias: tab @@ -374,7 +352,21 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -397,7 +389,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) @@ -435,46 +427,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (CUSTOM_EDGE) - Map 2 <- Map 4 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Map 4 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col1 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan alias: x Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -491,25 +451,21 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) + Group By Operator + aggregations: sum(substr(_col1, 5)) + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(_col1, 5)) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Map 4 + value expressions: _col1 (type: double) + Map 3 Map Operator Tree: TableScan alias: y @@ -522,7 +478,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -565,39 +553,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE) + Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 2 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -617,8 +578,8 @@ STAGE PLANS: 2 key (type: int) outputColumnNames: _col0, _col1, _col7 input vertices: - 1 Map 1 - 2 Map 2 + 1 Map 2 + 2 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) @@ -631,6 +592,33 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -654,26 +642,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) + Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 2 - Map Operator Tree: - TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -690,37 +664,33 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {value} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 Map Operator Tree: TableScan alias: y @@ -733,6 +703,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Stage: Stage-0 Fetch Operator @@ -760,13 +744,33 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -783,7 +787,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) @@ -796,48 +800,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: srcbucket_mapjoin - Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -863,13 +839,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: srcbucket_mapjoin + Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -886,7 +876,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 3 + 0 Reducer 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) @@ -899,42 +889,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: srcbucket_mapjoin - Statistics: Num rows: 27 Data size: 2808 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE - value expressions: substr(value, 5) (type: string) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -960,12 +928,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -982,7 +964,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col1, _col7 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) @@ -995,20 +977,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) Stage: Stage-0 Fetch Operator @@ -1054,12 +1022,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 2 (CUSTOM_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1076,7 +1058,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) @@ -1089,20 +1071,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Stage: Stage-0 Fetch Operator @@ -1122,23 +1090,23 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (CUSTOM_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 2 Map Operator Tree: TableScan @@ -1158,7 +1126,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1171,7 +1139,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col12 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col12 (type: int) @@ -1187,17 +1155,17 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out index 240decc..116d9e5 100644 --- a/ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out @@ -120,23 +120,23 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (CUSTOM_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 2 Map Operator Tree: TableScan @@ -156,7 +156,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -169,7 +169,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col12 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col12 (type: int) @@ -185,17 +185,17 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) Stage: Stage-0 Fetch Operator @@ -241,12 +241,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -263,7 +277,7 @@ STAGE PLANS: 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col6 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) @@ -276,20 +290,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int), value (type: string) Stage: Stage-0 Fetch Operator @@ -540,80 +540,71 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {key} - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 0 Reducer 3 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: tab + alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {key} + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -633,81 +624,72 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b + alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: UDFToDouble(value) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col2 - input vertices: - 0 Reducer 3 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: tab + alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(value) is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + value expressions: value (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/cbo_union.q.out b/ql/src/test/results/clientpositive/tez/cbo_union.q.out index 4084a15..eb02b03 100644 --- a/ql/src/test/results/clientpositive/tez/cbo_union.q.out +++ b/ql/src/test/results/clientpositive/tez/cbo_union.q.out @@ -25,13 +25,13 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 +1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 -2 2 2 2.0 true 2014 null null NULL NULL NULL 2014 null null NULL NULL NULL 2014 1 1 1 1.0 true 2014 @@ -45,13 +45,13 @@ null null NULL NULL NULL 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 -1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 +2 2 2 2.0 true 2014 null null NULL NULL NULL 2014 null null NULL NULL NULL 2014 PREHOOK: query: select key from (select key, c_int from (select * from cbo_t1 union all select * from cbo_t2 where cbo_t2.key >=0)r1 union all select key, c_int from cbo_t3)r2 where key >=0 order by key diff --git a/ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out b/ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out index 5631926..245bf4a 100644 --- a/ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out +++ b/ql/src/test/results/clientpositive/tez/correlationoptimizer1.q.out @@ -69,22 +69,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -93,19 +89,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -209,22 +201,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -233,19 +221,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -355,22 +339,18 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -379,19 +359,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -510,22 +486,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -534,19 +506,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -659,22 +627,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -683,19 +647,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -799,22 +759,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -823,19 +779,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -933,22 +885,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -957,19 +905,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -1073,22 +1017,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1097,19 +1037,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -1207,22 +1143,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1231,19 +1163,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -1317,23 +1245,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -1344,22 +1272,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey1} outputColumnNames: _col0, _col6 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1441,23 +1365,23 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string), value (type: string) sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -1468,22 +1392,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey1} outputColumnNames: _col0, _col6 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1601,22 +1521,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1625,19 +1541,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -1735,22 +1647,18 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col5 + Group By Operator + aggregations: count(1) + keys: _col5 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col5 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1759,19 +1667,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -1875,22 +1779,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1899,19 +1799,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2009,22 +1905,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2033,19 +1925,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2151,22 +2039,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2175,19 +2059,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2285,22 +2165,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2309,19 +2185,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2428,22 +2300,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2452,19 +2320,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2569,22 +2433,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2593,19 +2453,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2711,22 +2567,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2735,19 +2587,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2851,22 +2699,18 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2875,19 +2719,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/cross_join.q.out b/ql/src/test/results/clientpositive/tez/cross_join.q.out index ad0c759..e40ca8c 100644 --- a/ql/src/test/results/clientpositive/tez/cross_join.q.out +++ b/ql/src/test/results/clientpositive/tez/cross_join.q.out @@ -19,20 +19,20 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Map 3 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -82,20 +82,20 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Map 3 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -144,7 +144,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -157,7 +157,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: src + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/tez/cross_product_check_1.q.out b/ql/src/test/results/clientpositive/tez/cross_product_check_1.q.out index afc261a..1e6ee1f 100644 --- a/ql/src/test/results/clientpositive/tez/cross_product_check_1.q.out +++ b/ql/src/test/results/clientpositive/tez/cross_product_check_1.q.out @@ -43,20 +43,20 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) Reducer 2 Reduce Operator Tree: @@ -251,35 +251,27 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -363,35 +355,27 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -420,7 +404,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[29][tables = [ss, od1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [ss, od1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -437,15 +421,34 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan alias: d1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -456,7 +459,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: d2 @@ -469,65 +472,18 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 Reduce Operator Tree: Merge Join Operator condition map: @@ -536,33 +492,49 @@ STAGE PLANS: 0 {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 5 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out b/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out index 0e407c5..b1f7a91 100644 --- a/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out +++ b/ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out @@ -24,7 +24,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@B -Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Map 2' is a cross product +Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Map 1' is a cross product PREHOOK: query: explain select * from A join B PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join B @@ -37,21 +37,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -65,7 +56,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) @@ -78,6 +69,15 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Stage: Stage-0 Fetch Operator @@ -222,20 +222,16 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan @@ -284,15 +280,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -337,20 +329,16 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan @@ -394,15 +382,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 51 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -410,7 +394,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -427,14 +411,33 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) + Map 3 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan alias: d1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -451,23 +454,19 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 5 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 5 Map Operator Tree: TableScan alias: d2 @@ -480,75 +479,48 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {_col0} - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/ctas.q.out b/ql/src/test/results/clientpositive/tez/ctas.q.out index b90716e..abc54cb 100644 --- a/ql/src/test/results/clientpositive/tez/ctas.q.out +++ b/ql/src/test/results/clientpositive/tez/ctas.q.out @@ -1,10 +1,10 @@ -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@nzhang_Tmp -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS( 0.20S) create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index 9a3315f..97c57f0 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -251,17 +251,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -356,17 +354,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -440,8 +436,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -459,19 +455,19 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr is not null and (hour = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (hour = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: hr (type: string) + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: hr (type: string) + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: hr (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -481,26 +477,26 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 Map 6 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: (hr is not null and (hour = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: (hr is not null and (hour = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: hr (type: string) sort order: + - Map-reduce partition columns: ds (type: string) + Map-reduce partition columns: hr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: hr (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -510,9 +506,9 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: @@ -538,17 +534,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -611,8 +605,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -631,30 +625,30 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr is not null and (hour = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (hour = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: hr (type: string) + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: hr (type: string) + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: (hr is not null and (hour = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: (hr is not null and (hour = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: hr (type: string) sort order: + - Map-reduce partition columns: ds (type: string) + Map-reduce partition columns: hr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -680,17 +674,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -831,17 +823,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -936,17 +926,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1069,17 +1057,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1174,17 +1160,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1308,17 +1292,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1431,17 +1413,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1539,17 +1519,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1647,17 +1625,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1783,17 +1759,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1865,15 +1839,35 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1895,28 +1889,20 @@ STAGE PLANS: Partition key expr: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Target column: ds - Target Vertex: Map 4 - Map 4 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) + Target Vertex: Map 1 + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 Reduce Operator Tree: Merge Join Operator condition map: @@ -1925,18 +1911,16 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1954,22 +1938,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2364,16 +2332,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (date = '2008-04-08') (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE @@ -2399,7 +2357,17 @@ STAGE PLANS: Partition key expr: ds Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Target column: ds - Target Vertex: Map 1 + Target Vertex: Map 4 + Map 4 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -2409,17 +2377,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2549,8 +2515,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2568,18 +2534,19 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: ds (type: string) sort order: + + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: '11' (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -2589,26 +2556,25 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 Map 6 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: '11' (type: string) sort order: + - Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: '11' (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -2618,9 +2584,9 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: @@ -2644,17 +2610,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2713,8 +2677,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2722,19 +2686,6 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr = 13) (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (hr = 13) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '13' (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE @@ -2746,6 +2697,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: srcpart_hour + filterExpr: (hr = 13) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hr = 13) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '13' (type: string) + sort order: + + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -2768,17 +2732,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2833,23 +2795,34 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -2857,17 +2830,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -2878,7 +2840,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -2888,40 +2850,6 @@ STAGE PLANS: value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Reducer 4 - Reduce Operator Tree: Merge Join Operator condition map: Left Semi Join 0 to 1 @@ -2929,18 +2857,16 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2958,7 +2884,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -2966,9 +2892,14 @@ STAGE PLANS: outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -2976,47 +2907,64 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Union 6 + Vertex: Union 6 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### 2000 PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY @@ -3030,23 +2978,34 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -3054,17 +3013,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -3075,7 +3023,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -3085,40 +3033,6 @@ STAGE PLANS: value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Reducer 4 - Reduce Operator Tree: Merge Join Operator condition map: Left Semi Join 0 to 1 @@ -3127,21 +3041,17 @@ STAGE PLANS: 1 outputColumnNames: _col2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3159,7 +3069,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3167,9 +3077,14 @@ STAGE PLANS: outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3177,24 +3092,41 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Union 3 - Vertex: Union 3 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Union 6 + Vertex: Union 6 Stage: Stage-0 Fetch Operator @@ -3232,31 +3164,29 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 7 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 7 (CONTAINS) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ds) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 10 Map Operator Tree: TableScan @@ -3267,7 +3197,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -3281,51 +3211,50 @@ STAGE PLANS: alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 7 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3333,45 +3262,11 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 8 - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3379,34 +3274,21 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 8 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 5 + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -3434,30 +3316,54 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reducer 9 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 8 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 5 Union 3 Vertex: Union 3 - Union 7 - Vertex: Union 7 + Union 9 + Vertex: Union 9 Stage: Stage-0 Fetch Operator @@ -3518,20 +3424,18 @@ STAGE PLANS: keys: 0 ds (type: string) 1 ds (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -3654,7 +3558,7 @@ STAGE PLANS: 1 ds (type: string) outputColumnNames: _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -3666,35 +3570,33 @@ STAGE PLANS: 0 _col3 (type: string) 1 hr (type: string) input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr is not null and (hour = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (hour = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: hr (type: string) + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: hr (type: string) + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: hr (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -3704,26 +3606,26 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 Map 4 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: (hr is not null and (hour = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: (hr is not null and (hour = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: hr (type: string) sort order: + - Map-reduce partition columns: ds (type: string) + Map-reduce partition columns: hr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: hr (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -3733,9 +3635,9 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: @@ -3831,17 +3733,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -3977,17 +3877,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4087,17 +3985,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4208,17 +4104,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4319,7 +4213,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE) + Map 3 <- Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -4328,6 +4222,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -4340,42 +4254,47 @@ STAGE PLANS: 0 ds (type: string) 1 _col0 (type: string) input vertices: - 1 Reducer 4 + 1 Reducer 2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Target column: ds + Target Vertex: Map 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -4391,37 +4310,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 Stage: Stage-0 Fetch Operator @@ -4557,23 +4445,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (date = '2008-04-08') (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE @@ -4590,20 +4468,28 @@ STAGE PLANS: 0 ds (type: string) 1 ds (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4752,7 +4638,7 @@ STAGE PLANS: 0 ds (type: string) 1 ds (type: string) input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -4764,34 +4650,33 @@ STAGE PLANS: 0 '11' (type: string) 1 '11' (type: string) input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: ds (type: string) sort order: + + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: '11' (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -4801,26 +4686,25 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 Map 4 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: '11' (type: string) sort order: + - Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: '11' (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -4830,9 +4714,9 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: @@ -4892,15 +4776,29 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map 2 Map Operator Tree: TableScan + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan alias: srcpart_hour filterExpr: (hr = 13) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE @@ -4919,32 +4817,16 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4991,34 +4873,16 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 4 <- Union 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 1 <- Union 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 5 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ds) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -5032,23 +4896,19 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2 input vertices: - 1 Union 3 + 1 Union 5 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -5066,41 +4926,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) + Map 6 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ds) + mode: hash outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 4 - Reducer 5 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -5118,7 +4962,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -5126,9 +4970,14 @@ STAGE PLANS: outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -5136,24 +4985,41 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 4 - Union 3 - Vertex: Union 3 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Union 5 + Vertex: Union 5 Stage: Stage-0 Fetch Operator @@ -5253,17 +5119,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 103400 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 103400 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out index ed94ee8..35504bb 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out @@ -155,44 +155,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - value expressions: label (type: string) - Select Operator - expressions: id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: agg - Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Target column: dim_shops_id - Target Vertex: Map 2 - Map 2 - Map Operator Tree: - TableScan alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE @@ -207,7 +177,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) @@ -228,32 +198,58 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: agg + Partition key expr: dim_shops_id + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Target column: dim_shops_id + Target Vertex: Map 1 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) - Reducer 4 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -323,29 +319,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - value expressions: label (type: string) - Map 2 - Map Operator Tree: - TableScan alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE @@ -360,7 +341,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) @@ -381,32 +362,43 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) - Reducer 4 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -468,27 +460,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d1 - filterExpr: id is not null (type: boolean) - Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: id is not null (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: label (type: string) - Map 2 - Map Operator Tree: - TableScan alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE @@ -503,7 +480,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 = _col5) (type: boolean) @@ -519,6 +496,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) Stage: Stage-0 Fetch Operator @@ -577,25 +569,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d1 - filterExpr: (id = 1) (type: boolean) - Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 1 (type: int) - sort order: + - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: agg filterExpr: (dim_shops_id = 1) (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE @@ -610,7 +589,7 @@ STAGE PLANS: 1 1 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: decimal(10,0)) @@ -623,6 +602,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id = 1) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id = 1) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -679,44 +671,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - value expressions: label (type: string) - Select Operator - expressions: id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: agg - Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Target column: dim_shops_id - Target Vertex: Map 2 - Map 2 - Map Operator Tree: - TableScan alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE @@ -731,7 +693,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) @@ -752,32 +714,58 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: agg + Partition key expr: dim_shops_id + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Target column: dim_shops_id + Target Vertex: Map 1 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) - Reducer 4 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -839,40 +827,42 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Map 1 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 5 <- Map 2 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: dim_shops - filterExpr: (id is not null and (label = 'bar')) (type: boolean) - Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (id is not null and (label = 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + alias: agg_01 + filterExpr: dim_shops_id is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {amount} {dim_shops_id} + 1 {id} + keys: + 0 dim_shops_id (type: int) + 1 id (type: int) + outputColumnNames: _col0, _col1, _col5 + input vertices: + 1 Map 3 + Filter Operator + predicate: (_col1 = _col5) (type: boolean) + Select Operator + expressions: _col0 (type: decimal(10,0)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: agg_01 - Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Target column: dim_shops_id - Target Vertex: Map 3 - Map 2 + Select Operator + expressions: _col0 (type: decimal(10,0)) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 3 Map Operator Tree: TableScan alias: dim_shops @@ -900,8 +890,8 @@ STAGE PLANS: Partition key expr: dim_shops_id Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id - Target Vertex: Map 5 - Map 3 + Target Vertex: Map 1 + Map 4 Map Operator Tree: TableScan alias: agg_01 @@ -917,7 +907,7 @@ STAGE PLANS: 1 id (type: int) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 1 + 1 Map 5 Filter Operator predicate: (_col1 = _col5) (type: boolean) Select Operator @@ -935,36 +925,34 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: agg_01 - filterExpr: dim_shops_id is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {amount} {dim_shops_id} - 1 {id} - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 2 - Filter Operator - predicate: (_col1 = _col5) (type: boolean) - Select Operator - expressions: _col0 (type: decimal(10,0)) + alias: dim_shops + filterExpr: (id is not null and (label = 'bar')) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (label = 'bar')) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: decimal(10,0)) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 4 - Vertex: Union 4 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: agg_01 + Partition key expr: dim_shops_id + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Target column: dim_shops_id + Target Vertex: Map 4 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -992,9 +980,9 @@ POSTHOOK: Input: default@agg_01@dim_shops_id=2 POSTHOOK: Input: default@agg_01@dim_shops_id=3 POSTHOOK: Input: default@dim_shops #### A masked pattern was here #### -4 -5 -6 1 2 3 +4 +5 +6 diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out index 6f61441..909a201 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out @@ -1609,18 +1609,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2_orc + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2_orc Execution mode: vectorized Stage: Stage-2 @@ -1691,25 +1687,21 @@ STAGE PLANS: keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: tinyint) - sort order: + - Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: Extract - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out index a69b814..ab0cd53 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out @@ -1513,18 +1513,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 Stage: Stage-2 Dependency Collection @@ -1593,24 +1589,20 @@ STAGE PLANS: keys: KEY._col0 (type: smallint), KEY._col1 (type: int), KEY._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: tinyint) - sort order: + - Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Reducer 3 Reduce Operator Tree: Extract - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out index 42e2cde..481ca10 100644 --- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out @@ -93,20 +93,16 @@ STAGE PLANS: Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -580,20 +576,16 @@ STAGE PLANS: Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -1121,20 +1113,16 @@ STAGE PLANS: Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - outputColumnNames: ss_sold_date_sk, ss_net_paid_inc_tax, ss_net_profit + Group By Operator + keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out index f85fdbc..33024b0 100644 --- a/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out @@ -142,8 +142,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -216,19 +216,20 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: g + alias: m Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE + predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) + Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE tag: 1 + value expressions: value (type: string) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -279,24 +280,23 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] + /filter_join_breaktask/ds=2008-04-08 [m] Map 5 Map Operator Tree: TableScan - alias: m + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) - Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: value (type: string) auto parallelism: true Path -> Alias: #### A masked pattern was here #### @@ -347,7 +347,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [m] + /filter_join_breaktask/ds=2008-04-08 [g] Reducer 2 Needs Tagging: false Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/tez/having.q.out b/ql/src/test/results/clientpositive/tez/having.q.out index 7eb6725..f0ffce3 100644 --- a/ql/src/test/results/clientpositive/tez/having.q.out +++ b/ql/src/test/results/clientpositive/tez/having.q.out @@ -106,22 +106,18 @@ STAGE PLANS: Filter Operator predicate: (key <> 302) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -759,22 +755,18 @@ STAGE PLANS: Filter Operator predicate: (key > 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: max(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/join0.q.out b/ql/src/test/results/clientpositive/tez/join0.q.out index 4835781..391368e 100644 --- a/ql/src/test/results/clientpositive/tez/join0.q.out +++ b/ql/src/test/results/clientpositive/tez/join0.q.out @@ -69,14 +69,10 @@ STAGE PLANS: 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/tez/join1.q.out b/ql/src/test/results/clientpositive/tez/join1.q.out index cd61749..226bce5 100644 --- a/ql/src/test/results/clientpositive/tez/join1.q.out +++ b/ql/src/test/results/clientpositive/tez/join1.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -44,11 +44,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -58,6 +57,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator diff --git a/ql/src/test/results/clientpositive/tez/join_nullsafe.q.out b/ql/src/test/results/clientpositive/tez/join_nullsafe.q.out index b71028a..1639324 100644 --- a/ql/src/test/results/clientpositive/tez/join_nullsafe.q.out +++ b/ql/src/test/results/clientpositive/tez/join_nullsafe.q.out @@ -38,25 +38,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -123,35 +123,35 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -220,29 +220,29 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: int) + Map-reduce partition columns: key (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int) + key expressions: value (type: int) sort order: + - Map-reduce partition columns: key (type: int) + Map-reduce partition columns: value (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) @@ -336,33 +336,33 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int), key (type: int) + key expressions: key (type: int), value (type: int) sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int), value (type: int) + key expressions: value (type: int), key (type: int) sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Map-reduce partition columns: value (type: int), key (type: int) Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) @@ -431,27 +431,27 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int), key (type: int) + key expressions: key (type: int), value (type: int) sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Map-reduce partition columns: key (type: int), value (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: int), value (type: int) + key expressions: value (type: int), key (type: int) sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Map-reduce partition columns: value (type: int), key (type: int) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) @@ -1570,29 +1570,29 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is null (type: boolean) + predicate: key is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: null (type: void) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: value (type: int) Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is null (type: boolean) + predicate: value is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: null (type: void) sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + value expressions: key (type: int) Reducer 2 Reduce Operator Tree: Merge Join Operator diff --git a/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out b/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out index d5d6e68..df18610 100644 --- a/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/tez/limit_pushdown.q.out @@ -727,23 +727,19 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: double) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE @@ -871,22 +867,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Merge Join Operator @@ -919,18 +911,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 3 + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col0 (type: string), _col1 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 6 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out new file mode 100644 index 0000000..d41b090 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +drop table sour1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- SORT_QUERY_RESULTS + +drop table sour1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table sour2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table sour2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table expod1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table expod1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table expod2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table expod2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sour1 +POSTHOOK: query: create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sour1 +PREHOOK: query: create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sour2 +POSTHOOK: query: create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sour2 +PREHOOK: query: load data local inpath '../../data/files/sour1.txt' into table sour1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sour1 +POSTHOOK: query: load data local inpath '../../data/files/sour1.txt' into table sour1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sour1 +PREHOOK: query: load data local inpath '../../data/files//sour2.txt' into table sour2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@sour2 +POSTHOOK: query: load data local inpath '../../data/files//sour2.txt' into table sour2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@sour2 +PREHOOK: query: create table expod1(aid int, av array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@expod1 +POSTHOOK: query: create table expod1(aid int, av array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@expod1 +PREHOOK: query: create table expod2(bid int, bv array) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@expod2 +POSTHOOK: query: create table expod2(bid int, bv array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@expod2 +PREHOOK: query: insert overwrite table expod1 select id, array(av1,av2,av3) from sour1 +PREHOOK: type: QUERY +PREHOOK: Input: default@sour1 +PREHOOK: Output: default@expod1 +POSTHOOK: query: insert overwrite table expod1 select id, array(av1,av2,av3) from sour1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sour1 +POSTHOOK: Output: default@expod1 +POSTHOOK: Lineage: expod1.aid SIMPLE [(sour1)sour1.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: expod1.av EXPRESSION [(sour1)sour1.FieldSchema(name:av1, type:string, comment:null), (sour1)sour1.FieldSchema(name:av2, type:string, comment:null), (sour1)sour1.FieldSchema(name:av3, type:string, comment:null), ] +PREHOOK: query: insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2 +PREHOOK: type: QUERY +PREHOOK: Input: default@sour2 +PREHOOK: Output: default@expod2 +POSTHOOK: query: insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sour2 +POSTHOOK: Output: default@expod2 +POSTHOOK: Lineage: expod2.bid SIMPLE [(sour2)sour2.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: expod2.bv EXPRESSION [(sour2)sour2.FieldSchema(name:bv1, type:string, comment:null), (sour2)sour2.FieldSchema(name:bv2, type:string, comment:null), (sour2)sour2.FieldSchema(name:bv3, type:string, comment:null), ] +PREHOOK: query: explain with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +PREHOOK: type: QUERY +POSTHOOK: query: explain with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: expod2 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: bid is not null (type: boolean) + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: bid (type: int) + outputColumnNames: bid + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: bv (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 2 + Map Operator Tree: + TableScan + alias: expod1 + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: aid is not null (type: boolean) + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: aid (type: int) + outputColumnNames: aid + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col2) (type: boolean) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: av (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col0} {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 1 + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col2) (type: boolean) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +PREHOOK: type: QUERY +PREHOOK: Input: default@expod1 +PREHOOK: Input: default@expod2 +#### A masked pattern was here #### +POSTHOOK: query: with sub1 as +(select aid, avalue from expod1 lateral view explode(av) avs as avalue ), +sub2 as +(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue) +select sub1.aid, sub1.avalue, sub2.bvalue +from sub1,sub2 +where sub1.aid=sub2.bid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@expod1 +POSTHOOK: Input: default@expod2 +#### A masked pattern was here #### +1 a1 b1 +1 a1 b11 +1 a1 b111 +1 a11 b1 +1 a11 b11 +1 a11 b111 +1 a111 b1 +1 a111 b11 +1 a111 b111 +2 a2 b2 +2 a2 b22 +2 a2 b222 +2 a22 b2 +2 a22 b22 +2 a22 b222 +2 a222 b2 +2 a222 b22 +2 a222 b222 diff --git a/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out b/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out index bbf3c75..517f986 100644 --- a/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out +++ b/ql/src/test/results/clientpositive/tez/mapjoin_decimal.q.out @@ -90,25 +90,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: dec is not null (type: boolean) - Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: dec (type: decimal(4,0)) - sort order: + - Map-reduce partition columns: dec (type: decimal(4,0)) - Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: t1 Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -125,7 +112,7 @@ STAGE PLANS: 1 dec (type: decimal(4,0)) outputColumnNames: _col0, _col4 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 577 Data size: 64680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) @@ -138,6 +125,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: dec is not null (type: boolean) + Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: dec (type: decimal(4,0)) + sort order: + + Map-reduce partition columns: dec (type: decimal(4,0)) + Statistics: Num rows: 525 Data size: 58800 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out index c8aba93..efccbc6 100644 --- a/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out @@ -82,13 +82,13 @@ STAGE PLANS: condition expressions: 0 {key} 1 - Estimated key counts: Map 3 => 250 + Estimated key counts: Map 2 => 250 keys: 0 value (type: string) 1 value (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -97,13 +97,13 @@ STAGE PLANS: condition expressions: 0 {_col0} 1 - Estimated key counts: Map 2 => 13 + Estimated key counts: Map 3 => 13 keys: 0 _col0 (type: string) 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -326,18 +326,18 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: true Path -> Alias: @@ -345,7 +345,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -355,14 +355,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -375,35 +375,35 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [src1] + /src [src] Map 3 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: true Path -> Alias: @@ -411,7 +411,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -421,14 +421,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -441,20 +441,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [src] + /src1 [src1] Stage: Stage-0 Fetch Operator @@ -498,7 +498,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -511,7 +511,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -527,19 +527,6 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -550,6 +537,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -594,7 +594,7 @@ STAGE PLANS: 1 value (type: string) outputColumnNames: _col0, _col2 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -607,49 +607,45 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + aggregations: count() + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: string) + Map-reduce partition columns: value (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: value (type: string) + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/tez/metadataonly1.q.out b/ql/src/test/results/clientpositive/tez/metadataonly1.q.out index cd038c8..f7774fe 100644 --- a/ql/src/test/results/clientpositive/tez/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/tez/metadataonly1.q.out @@ -841,32 +841,28 @@ STAGE PLANS: isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: a2 + Partition key expr: ds Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: a2 - Partition key expr: ds - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 5 + Target column: ds + Target Vertex: Map 5 Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -878,19 +874,17 @@ STAGE PLANS: 1 Position of Big Table: 0 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/tez/mrr.q.out b/ql/src/test/results/clientpositive/tez/mrr.q.out index 5718c32..41c7760 100644 --- a/ql/src/test/results/clientpositive/tez/mrr.q.out +++ b/ql/src/test/results/clientpositive/tez/mrr.q.out @@ -44,25 +44,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -413,7 +409,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -423,11 +419,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Map 5 Map Operator Tree: TableScan - alias: s1 + alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -437,6 +432,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -447,21 +443,17 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col5, _col6 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col6 (type: string) - outputColumnNames: _col5, _col6 + Group By Operator + aggregations: count(DISTINCT _col6) + keys: _col5 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col6) - keys: _col5 (type: string), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator @@ -469,25 +461,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -830,28 +818,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 2 - Map Operator Tree: - TableScan alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -868,49 +842,55 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col5, _col6 input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col6 (type: string) - outputColumnNames: _col5, _col6 + Group By Operator + aggregations: count(DISTINCT _col6) + keys: _col5 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col6) - keys: _col5 (type: string), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1299,22 +1279,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -1323,22 +1299,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Map 9 Map Operator Tree: TableScan @@ -1347,22 +1319,18 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 10 Reduce Operator Tree: Group By Operator @@ -1370,30 +1338,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 11 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -1403,26 +1364,25 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 Reduce Operator Tree: @@ -1435,25 +1395,21 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} 2 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: bigint), VALUE._col3 (type: string), VALUE._col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1465,27 +1421,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 8 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -1686,8 +1638,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1698,31 +1649,19 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {key} {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Reducer 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 2 + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: src @@ -1730,40 +1669,44 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(value) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + value expressions: value (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {key} {value} + keys: + 0 _col0 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1785,503 +1728,503 @@ FROM POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -238 2 238 val_238 -86 1 86 val_86 -311 3 311 val_311 -27 1 27 val_27 -165 2 165 val_165 -409 3 409 val_409 -255 2 255 val_255 -278 2 278 val_278 -98 2 98 val_98 -484 1 484 val_484 -265 2 265 val_265 -193 3 193 val_193 -401 5 401 val_401 -150 1 150 val_150 -273 3 273 val_273 -224 2 224 val_224 -369 3 369 val_369 -66 1 66 val_66 -128 3 128 val_128 -213 2 213 val_213 -146 2 146 val_146 -406 4 406 val_406 -429 2 429 val_429 -374 1 374 val_374 -152 2 152 val_152 -469 5 469 val_469 -145 1 145 val_145 -495 1 495 val_495 -37 2 37 val_37 -327 3 327 val_327 -281 2 281 val_281 -277 4 277 val_277 -209 2 209 val_209 -15 2 15 val_15 -82 1 82 val_82 -403 3 403 val_403 -166 1 166 val_166 -417 3 417 val_417 -430 3 430 val_430 -252 1 252 val_252 -292 1 292 val_292 -219 2 219 val_219 -287 1 287 val_287 -153 1 153 val_153 -193 3 193 val_193 -338 1 338 val_338 -446 1 446 val_446 -459 2 459 val_459 -394 1 394 val_394 -237 2 237 val_237 -482 1 482 val_482 -174 2 174 val_174 -413 2 413 val_413 -494 1 494 val_494 -207 2 207 val_207 -199 3 199 val_199 -466 3 466 val_466 -208 3 208 val_208 -174 2 174 val_174 -399 2 399 val_399 -396 3 396 val_396 -247 1 247 val_247 -417 3 417 val_417 -489 4 489 val_489 -162 1 162 val_162 -377 1 377 val_377 -397 2 397 val_397 -309 2 309 val_309 -365 1 365 val_365 -266 1 266 val_266 -439 2 439 val_439 -342 2 342 val_342 -367 2 367 val_367 -325 2 325 val_325 -167 3 167 val_167 -195 2 195 val_195 -475 1 475 val_475 -17 1 17 val_17 -113 2 113 val_113 -155 1 155 val_155 -203 2 203 val_203 -339 1 339 val_339 0 3 0 val_0 -455 1 455 val_455 +0 3 0 val_0 +0 3 0 val_0 +10 1 10 val_10 +100 2 100 val_100 +100 2 100 val_100 +103 2 103 val_103 +103 2 103 val_103 +104 2 104 val_104 +104 2 104 val_104 +105 1 105 val_105 +11 1 11 val_11 +111 1 111 val_111 +113 2 113 val_113 +113 2 113 val_113 +114 1 114 val_114 +116 1 116 val_116 +118 2 118 val_118 +118 2 118 val_118 +119 3 119 val_119 +119 3 119 val_119 +119 3 119 val_119 +12 2 12 val_12 +12 2 12 val_12 +120 2 120 val_120 +120 2 120 val_120 +125 2 125 val_125 +125 2 125 val_125 +126 1 126 val_126 +128 3 128 val_128 +128 3 128 val_128 128 3 128 val_128 -311 3 311 val_311 -316 3 316 val_316 -57 1 57 val_57 -302 1 302 val_302 -205 2 205 val_205 -149 2 149 val_149 -438 3 438 val_438 -345 1 345 val_345 129 2 129 val_129 -170 1 170 val_170 -20 1 20 val_20 -489 4 489 val_489 +129 2 129 val_129 +131 1 131 val_131 +133 1 133 val_133 +134 2 134 val_134 +134 2 134 val_134 +136 1 136 val_136 +137 2 137 val_137 +137 2 137 val_137 +138 4 138 val_138 +138 4 138 val_138 +138 4 138 val_138 +138 4 138 val_138 +143 1 143 val_143 +145 1 145 val_145 +146 2 146 val_146 +146 2 146 val_146 +149 2 149 val_149 +149 2 149 val_149 +15 2 15 val_15 +15 2 15 val_15 +150 1 150 val_150 +152 2 152 val_152 +152 2 152 val_152 +153 1 153 val_153 +155 1 155 val_155 +156 1 156 val_156 157 1 157 val_157 -378 1 378 val_378 -221 2 221 val_221 -92 1 92 val_92 -111 1 111 val_111 -47 1 47 val_47 -72 2 72 val_72 -4 1 4 val_4 -280 2 280 val_280 -35 3 35 val_35 -427 1 427 val_427 -277 4 277 val_277 -208 3 208 val_208 -356 1 356 val_356 -399 2 399 val_399 +158 1 158 val_158 +160 1 160 val_160 +162 1 162 val_162 +163 1 163 val_163 +164 2 164 val_164 +164 2 164 val_164 +165 2 165 val_165 +165 2 165 val_165 +166 1 166 val_166 +167 3 167 val_167 +167 3 167 val_167 +167 3 167 val_167 +168 1 168 val_168 169 4 169 val_169 -382 2 382 val_382 -498 3 498 val_498 -125 2 125 val_125 -386 1 386 val_386 -437 1 437 val_437 -469 5 469 val_469 -192 1 192 val_192 -286 1 286 val_286 -187 3 187 val_187 +169 4 169 val_169 +169 4 169 val_169 +169 4 169 val_169 +17 1 17 val_17 +170 1 170 val_170 +172 2 172 val_172 +172 2 172 val_172 +174 2 174 val_174 +174 2 174 val_174 +175 2 175 val_175 +175 2 175 val_175 176 2 176 val_176 -54 1 54 val_54 -459 2 459 val_459 -51 2 51 val_51 -138 4 138 val_138 -103 2 103 val_103 -239 2 239 val_239 -213 2 213 val_213 -216 2 216 val_216 -430 3 430 val_430 -278 2 278 val_278 176 2 176 val_176 -289 1 289 val_289 -221 2 221 val_221 -65 1 65 val_65 -318 3 318 val_318 -332 1 332 val_332 -311 3 311 val_311 -275 1 275 val_275 -137 2 137 val_137 -241 1 241 val_241 -83 2 83 val_83 -333 2 333 val_333 +177 1 177 val_177 +178 1 178 val_178 +179 2 179 val_179 +179 2 179 val_179 +18 2 18 val_18 +18 2 18 val_18 180 1 180 val_180 -284 1 284 val_284 -12 2 12 val_12 -230 5 230 val_230 181 1 181 val_181 -67 2 67 val_67 -260 1 260 val_260 -404 2 404 val_404 -384 3 384 val_384 -489 4 489 val_489 -353 2 353 val_353 -373 1 373 val_373 -272 2 272 val_272 -138 4 138 val_138 -217 2 217 val_217 -84 2 84 val_84 -348 5 348 val_348 -466 3 466 val_466 -58 2 58 val_58 -8 1 8 val_8 -411 1 411 val_411 -230 5 230 val_230 -208 3 208 val_208 -348 5 348 val_348 -24 2 24 val_24 -463 2 463 val_463 -431 3 431 val_431 -179 2 179 val_179 -172 2 172 val_172 -42 2 42 val_42 -129 2 129 val_129 -158 1 158 val_158 -119 3 119 val_119 -496 1 496 val_496 -0 3 0 val_0 -322 2 322 val_322 -197 2 197 val_197 -468 4 468 val_468 -393 1 393 val_393 -454 3 454 val_454 -100 2 100 val_100 -298 3 298 val_298 -199 3 199 val_199 -191 2 191 val_191 -418 1 418 val_418 -96 1 96 val_96 -26 2 26 val_26 -165 2 165 val_165 -327 3 327 val_327 -230 5 230 val_230 -205 2 205 val_205 -120 2 120 val_120 -131 1 131 val_131 -51 2 51 val_51 -404 2 404 val_404 -43 1 43 val_43 -436 1 436 val_436 -156 1 156 val_156 -469 5 469 val_469 -468 4 468 val_468 -308 1 308 val_308 -95 2 95 val_95 -196 1 196 val_196 -288 2 288 val_288 -481 1 481 val_481 -457 1 457 val_457 -98 2 98 val_98 -282 2 282 val_282 -197 2 197 val_197 +183 1 183 val_183 +186 1 186 val_186 187 3 187 val_187 -318 3 318 val_318 -318 3 318 val_318 -409 3 409 val_409 -470 1 470 val_470 -137 2 137 val_137 -369 3 369 val_369 -316 3 316 val_316 -169 4 169 val_169 -413 2 413 val_413 -85 1 85 val_85 -77 1 77 val_77 -0 3 0 val_0 -490 1 490 val_490 -87 1 87 val_87 -364 1 364 val_364 -179 2 179 val_179 -118 2 118 val_118 -134 2 134 val_134 -395 2 395 val_395 -282 2 282 val_282 -138 4 138 val_138 -238 2 238 val_238 -419 1 419 val_419 -15 2 15 val_15 -118 2 118 val_118 -72 2 72 val_72 -90 3 90 val_90 -307 2 307 val_307 +187 3 187 val_187 +187 3 187 val_187 +189 1 189 val_189 19 1 19 val_19 -435 1 435 val_435 -10 1 10 val_10 -277 4 277 val_277 -273 3 273 val_273 -306 1 306 val_306 -224 2 224 val_224 -309 2 309 val_309 -389 1 389 val_389 -327 3 327 val_327 -242 2 242 val_242 -369 3 369 val_369 -392 1 392 val_392 -272 2 272 val_272 -331 2 331 val_331 -401 5 401 val_401 -242 2 242 val_242 -452 1 452 val_452 -177 1 177 val_177 -226 1 226 val_226 -5 3 5 val_5 -497 1 497 val_497 -402 1 402 val_402 -396 3 396 val_396 -317 2 317 val_317 -395 2 395 val_395 -58 2 58 val_58 -35 3 35 val_35 -336 1 336 val_336 -95 2 95 val_95 -11 1 11 val_11 -168 1 168 val_168 -34 1 34 val_34 -229 2 229 val_229 -233 2 233 val_233 -143 1 143 val_143 -472 1 472 val_472 -322 2 322 val_322 -498 3 498 val_498 -160 1 160 val_160 +190 1 190 val_190 +191 2 191 val_191 +191 2 191 val_191 +192 1 192 val_192 +193 3 193 val_193 +193 3 193 val_193 +193 3 193 val_193 +194 1 194 val_194 195 2 195 val_195 -42 2 42 val_42 -321 2 321 val_321 -430 3 430 val_430 -119 3 119 val_119 -489 4 489 val_489 -458 2 458 val_458 -78 1 78 val_78 -76 2 76 val_76 -41 1 41 val_41 -223 2 223 val_223 -492 2 492 val_492 -149 2 149 val_149 -449 1 449 val_449 -218 1 218 val_218 -228 1 228 val_228 -138 4 138 val_138 -453 1 453 val_453 -30 1 30 val_30 +195 2 195 val_195 +196 1 196 val_196 +197 2 197 val_197 +197 2 197 val_197 +199 3 199 val_199 +199 3 199 val_199 +199 3 199 val_199 +2 1 2 val_2 +20 1 20 val_20 +200 2 200 val_200 +200 2 200 val_200 +201 1 201 val_201 +202 1 202 val_202 +203 2 203 val_203 +203 2 203 val_203 +205 2 205 val_205 +205 2 205 val_205 +207 2 207 val_207 +207 2 207 val_207 +208 3 208 val_208 +208 3 208 val_208 +208 3 208 val_208 +209 2 209 val_209 209 2 209 val_209 -64 1 64 val_64 -468 4 468 val_468 -76 2 76 val_76 -74 1 74 val_74 -342 2 342 val_342 -69 1 69 val_69 -230 5 230 val_230 -33 1 33 val_33 -368 1 368 val_368 -103 2 103 val_103 -296 1 296 val_296 -113 2 113 val_113 +213 2 213 val_213 +213 2 213 val_213 +214 1 214 val_214 216 2 216 val_216 -367 2 367 val_367 -344 2 344 val_344 -167 3 167 val_167 -274 1 274 val_274 +216 2 216 val_216 +217 2 217 val_217 +217 2 217 val_217 +218 1 218 val_218 219 2 219 val_219 -239 2 239 val_239 -485 1 485 val_485 -116 1 116 val_116 +219 2 219 val_219 +221 2 221 val_221 +221 2 221 val_221 +222 1 222 val_222 223 2 223 val_223 +223 2 223 val_223 +224 2 224 val_224 +224 2 224 val_224 +226 1 226 val_226 +228 1 228 val_228 +229 2 229 val_229 +229 2 229 val_229 +230 5 230 val_230 +230 5 230 val_230 +230 5 230 val_230 +230 5 230 val_230 +230 5 230 val_230 +233 2 233 val_233 +233 2 233 val_233 +235 1 235 val_235 +237 2 237 val_237 +237 2 237 val_237 +238 2 238 val_238 +238 2 238 val_238 +239 2 239 val_239 +239 2 239 val_239 +24 2 24 val_24 +24 2 24 val_24 +241 1 241 val_241 +242 2 242 val_242 +242 2 242 val_242 +244 1 244 val_244 +247 1 247 val_247 +248 1 248 val_248 +249 1 249 val_249 +252 1 252 val_252 +255 2 255 val_255 +255 2 255 val_255 +256 2 256 val_256 256 2 256 val_256 +257 1 257 val_257 +258 1 258 val_258 +26 2 26 val_26 +26 2 26 val_26 +260 1 260 val_260 +262 1 262 val_262 263 1 263 val_263 -70 3 70 val_70 -487 1 487 val_487 -480 3 480 val_480 -401 5 401 val_401 -288 2 288 val_288 -191 2 191 val_191 -5 3 5 val_5 -244 1 244 val_244 -438 3 438 val_438 -128 3 128 val_128 -467 1 467 val_467 -432 1 432 val_432 -202 1 202 val_202 -316 3 316 val_316 -229 2 229 val_229 -469 5 469 val_469 -463 2 463 val_463 +265 2 265 val_265 +265 2 265 val_265 +266 1 266 val_266 +27 1 27 val_27 +272 2 272 val_272 +272 2 272 val_272 +273 3 273 val_273 +273 3 273 val_273 +273 3 273 val_273 +274 1 274 val_274 +275 1 275 val_275 +277 4 277 val_277 +277 4 277 val_277 +277 4 277 val_277 +277 4 277 val_277 +278 2 278 val_278 +278 2 278 val_278 +28 1 28 val_28 280 2 280 val_280 -2 1 2 val_2 -35 3 35 val_35 +280 2 280 val_280 +281 2 281 val_281 +281 2 281 val_281 +282 2 282 val_282 +282 2 282 val_282 283 1 283 val_283 -331 2 331 val_331 -235 1 235 val_235 -80 1 80 val_80 -44 1 44 val_44 -193 3 193 val_193 +284 1 284 val_284 +285 1 285 val_285 +286 1 286 val_286 +287 1 287 val_287 +288 2 288 val_288 +288 2 288 val_288 +289 1 289 val_289 +291 1 291 val_291 +292 1 292 val_292 +296 1 296 val_296 +298 3 298 val_298 +298 3 298 val_298 +298 3 298 val_298 +30 1 30 val_30 +302 1 302 val_302 +305 1 305 val_305 +306 1 306 val_306 +307 2 307 val_307 +307 2 307 val_307 +308 1 308 val_308 +309 2 309 val_309 +309 2 309 val_309 +310 1 310 val_310 +311 3 311 val_311 +311 3 311 val_311 +311 3 311 val_311 +315 1 315 val_315 +316 3 316 val_316 +316 3 316 val_316 +316 3 316 val_316 +317 2 317 val_317 +317 2 317 val_317 +318 3 318 val_318 +318 3 318 val_318 +318 3 318 val_318 321 2 321 val_321 +321 2 321 val_321 +322 2 322 val_322 +322 2 322 val_322 +323 1 323 val_323 +325 2 325 val_325 +325 2 325 val_325 +327 3 327 val_327 +327 3 327 val_327 +327 3 327 val_327 +33 1 33 val_33 +331 2 331 val_331 +331 2 331 val_331 +332 1 332 val_332 +333 2 333 val_333 +333 2 333 val_333 335 1 335 val_335 -104 2 104 val_104 -466 3 466 val_466 +336 1 336 val_336 +338 1 338 val_338 +339 1 339 val_339 +34 1 34 val_34 +341 1 341 val_341 +342 2 342 val_342 +342 2 342 val_342 +344 2 344 val_344 +344 2 344 val_344 +345 1 345 val_345 +348 5 348 val_348 +348 5 348 val_348 +348 5 348 val_348 +348 5 348 val_348 +348 5 348 val_348 +35 3 35 val_35 +35 3 35 val_35 +35 3 35 val_35 +351 1 351 val_351 +353 2 353 val_353 +353 2 353 val_353 +356 1 356 val_356 +360 1 360 val_360 +362 1 362 val_362 +364 1 364 val_364 +365 1 365 val_365 366 1 366 val_366 -175 2 175 val_175 -403 3 403 val_403 -483 1 483 val_483 -53 1 53 val_53 -105 1 105 val_105 -257 1 257 val_257 -406 4 406 val_406 -409 3 409 val_409 -190 1 190 val_190 -406 4 406 val_406 +367 2 367 val_367 +367 2 367 val_367 +368 1 368 val_368 +369 3 369 val_369 +369 3 369 val_369 +369 3 369 val_369 +37 2 37 val_37 +37 2 37 val_37 +373 1 373 val_373 +374 1 374 val_374 +375 1 375 val_375 +377 1 377 val_377 +378 1 378 val_378 +379 1 379 val_379 +382 2 382 val_382 +382 2 382 val_382 +384 3 384 val_384 +384 3 384 val_384 +384 3 384 val_384 +386 1 386 val_386 +389 1 389 val_389 +392 1 392 val_392 +393 1 393 val_393 +394 1 394 val_394 +395 2 395 val_395 +395 2 395 val_395 +396 3 396 val_396 +396 3 396 val_396 +396 3 396 val_396 +397 2 397 val_397 +397 2 397 val_397 +399 2 399 val_399 +399 2 399 val_399 +4 1 4 val_4 +400 1 400 val_400 401 5 401 val_401 -114 1 114 val_114 -258 1 258 val_258 -90 3 90 val_90 -203 2 203 val_203 -262 1 262 val_262 -348 5 348 val_348 +401 5 401 val_401 +401 5 401 val_401 +401 5 401 val_401 +401 5 401 val_401 +402 1 402 val_402 +403 3 403 val_403 +403 3 403 val_403 +403 3 403 val_403 +404 2 404 val_404 +404 2 404 val_404 +406 4 406 val_406 +406 4 406 val_406 +406 4 406 val_406 +406 4 406 val_406 +407 1 407 val_407 +409 3 409 val_409 +409 3 409 val_409 +409 3 409 val_409 +41 1 41 val_41 +411 1 411 val_411 +413 2 413 val_413 +413 2 413 val_413 +414 2 414 val_414 +414 2 414 val_414 +417 3 417 val_417 +417 3 417 val_417 +417 3 417 val_417 +418 1 418 val_418 +419 1 419 val_419 +42 2 42 val_42 +42 2 42 val_42 +421 1 421 val_421 424 2 424 val_424 -12 2 12 val_12 -396 3 396 val_396 -201 1 201 val_201 -217 2 217 val_217 -164 2 164 val_164 +424 2 424 val_424 +427 1 427 val_427 +429 2 429 val_429 +429 2 429 val_429 +43 1 43 val_43 +430 3 430 val_430 +430 3 430 val_430 +430 3 430 val_430 431 3 431 val_431 -454 3 454 val_454 -478 2 478 val_478 -298 3 298 val_298 -125 2 125 val_125 431 3 431 val_431 -164 2 164 val_164 -424 2 424 val_424 -187 3 187 val_187 -382 2 382 val_382 -5 3 5 val_5 -70 3 70 val_70 -397 2 397 val_397 -480 3 480 val_480 -291 1 291 val_291 -24 2 24 val_24 -351 1 351 val_351 -255 2 255 val_255 -104 2 104 val_104 -70 3 70 val_70 -163 1 163 val_163 +431 3 431 val_431 +432 1 432 val_432 +435 1 435 val_435 +436 1 436 val_436 +437 1 437 val_437 +438 3 438 val_438 +438 3 438 val_438 438 3 438 val_438 -119 3 119 val_119 -414 2 414 val_414 -200 2 200 val_200 -491 1 491 val_491 -237 2 237 val_237 439 2 439 val_439 -360 1 360 val_360 -248 1 248 val_248 -479 1 479 val_479 -305 1 305 val_305 -417 3 417 val_417 -199 3 199 val_199 -444 1 444 val_444 -120 2 120 val_120 -429 2 429 val_429 -169 4 169 val_169 +439 2 439 val_439 +44 1 44 val_44 443 1 443 val_443 -323 1 323 val_323 -325 2 325 val_325 -277 4 277 val_277 -230 5 230 val_230 -478 2 478 val_478 -178 1 178 val_178 -468 4 468 val_468 -310 1 310 val_310 -317 2 317 val_317 -333 2 333 val_333 -493 1 493 val_493 +444 1 444 val_444 +446 1 446 val_446 +448 1 448 val_448 +449 1 449 val_449 +452 1 452 val_452 +453 1 453 val_453 +454 3 454 val_454 +454 3 454 val_454 +454 3 454 val_454 +455 1 455 val_455 +457 1 457 val_457 +458 2 458 val_458 +458 2 458 val_458 +459 2 459 val_459 +459 2 459 val_459 460 1 460 val_460 -207 2 207 val_207 -249 1 249 val_249 -265 2 265 val_265 -480 3 480 val_480 -83 2 83 val_83 -136 1 136 val_136 -353 2 353 val_353 -172 2 172 val_172 -214 1 214 val_214 462 2 462 val_462 -233 2 233 val_233 -406 4 406 val_406 -133 1 133 val_133 -175 2 175 val_175 -189 1 189 val_189 -454 3 454 val_454 -375 1 375 val_375 -401 5 401 val_401 -421 1 421 val_421 -407 1 407 val_407 -384 3 384 val_384 -256 2 256 val_256 -26 2 26 val_26 -134 2 134 val_134 -67 2 67 val_67 -384 3 384 val_384 -379 1 379 val_379 -18 2 18 val_18 462 2 462 val_462 +463 2 463 val_463 +463 2 463 val_463 +466 3 466 val_466 +466 3 466 val_466 +466 3 466 val_466 +467 1 467 val_467 +468 4 468 val_468 +468 4 468 val_468 +468 4 468 val_468 +468 4 468 val_468 +469 5 469 val_469 +469 5 469 val_469 +469 5 469 val_469 +469 5 469 val_469 +469 5 469 val_469 +47 1 47 val_47 +470 1 470 val_470 +472 1 472 val_472 +475 1 475 val_475 +477 1 477 val_477 +478 2 478 val_478 +478 2 478 val_478 +479 1 479 val_479 +480 3 480 val_480 +480 3 480 val_480 +480 3 480 val_480 +481 1 481 val_481 +482 1 482 val_482 +483 1 483 val_483 +484 1 484 val_484 +485 1 485 val_485 +487 1 487 val_487 +489 4 489 val_489 +489 4 489 val_489 +489 4 489 val_489 +489 4 489 val_489 +490 1 490 val_490 +491 1 491 val_491 492 2 492 val_492 -100 2 100 val_100 -298 3 298 val_298 -9 1 9 val_9 -341 1 341 val_341 +492 2 492 val_492 +493 1 493 val_493 +494 1 494 val_494 +495 1 495 val_495 +496 1 496 val_496 +497 1 497 val_497 498 3 498 val_498 -146 2 146 val_146 -458 2 458 val_458 -362 1 362 val_362 -186 1 186 val_186 -285 1 285 val_285 -348 5 348 val_348 -167 3 167 val_167 -18 2 18 val_18 -273 3 273 val_273 -183 1 183 val_183 -281 2 281 val_281 -344 2 344 val_344 -97 2 97 val_97 -469 5 469 val_469 -315 1 315 val_315 +498 3 498 val_498 +498 3 498 val_498 +5 3 5 val_5 +5 3 5 val_5 +5 3 5 val_5 +51 2 51 val_51 +51 2 51 val_51 +53 1 53 val_53 +54 1 54 val_54 +57 1 57 val_57 +58 2 58 val_58 +58 2 58 val_58 +64 1 64 val_64 +65 1 65 val_65 +66 1 66 val_66 +67 2 67 val_67 +67 2 67 val_67 +69 1 69 val_69 +70 3 70 val_70 +70 3 70 val_70 +70 3 70 val_70 +72 2 72 val_72 +72 2 72 val_72 +74 1 74 val_74 +76 2 76 val_76 +76 2 76 val_76 +77 1 77 val_77 +78 1 78 val_78 +8 1 8 val_8 +80 1 80 val_80 +82 1 82 val_82 +83 2 83 val_83 +83 2 83 val_83 84 2 84 val_84 -28 1 28 val_28 -37 2 37 val_37 -448 1 448 val_448 -152 2 152 val_152 -348 5 348 val_348 -307 2 307 val_307 -194 1 194 val_194 -414 2 414 val_414 -477 1 477 val_477 -222 1 222 val_222 -126 1 126 val_126 +84 2 84 val_84 +85 1 85 val_85 +86 1 86 val_86 +87 1 87 val_87 +9 1 9 val_9 90 3 90 val_90 -169 4 169 val_169 -403 3 403 val_403 -400 1 400 val_400 -200 2 200 val_200 +90 3 90 val_90 +90 3 90 val_90 +92 1 92 val_92 +95 2 95 val_95 +95 2 95 val_95 +96 1 96 val_96 +97 2 97 val_97 97 2 97 val_97 +98 2 98 val_98 +98 2 98 val_98 diff --git a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out index c45f0db..741eb4b 100644 --- a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out @@ -654,6 +654,76 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(key) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: + -mr-10002default.src{} [src] + Path -> Partition: + -mr-10002default.src{} + Partition + base file name: src + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + -mr-10002default.src{} [src] + Map 4 + Map Operator Tree: + TableScan alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -864,80 +934,6 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(key) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: - -mr-10002default.src{} [src] - Path -> Partition: - -mr-10002default.src{} - Partition - base file name: src - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - -mr-10002default.src{} [src] Reducer 2 Needs Tagging: false Reduce Operator Tree: @@ -948,29 +944,26 @@ STAGE PLANS: Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Reducer 5 Needs Tagging: false Reduce Operator Tree: @@ -981,29 +974,26 @@ STAGE PLANS: Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 3 Vertex: Union 3 @@ -1031,8 +1021,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2000 0 +2000 Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain extended select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b @@ -1535,12 +1525,11 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 4 <- Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 1 <- Union 2 (CONTAINS) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 - Map 4 Map Operator Tree: TableScan alias: src @@ -1623,7 +1612,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Map 5 + Map 3 Map Operator Tree: TableScan alias: src @@ -1689,7 +1678,8 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: -mr-10002default.src{} [src] - Reducer 2 + Map 5 + Reducer 4 Needs Tagging: false Reduce Operator Tree: Merge Join Operator @@ -1703,31 +1693,28 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Union 3 - Vertex: Union 3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -1799,7 +1786,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -1811,13 +1798,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 1 + tag: 0 value expressions: key (type: string) auto parallelism: true Path -> Alias: - -mr-10003default.src{} [s2] + -mr-10002default.src{} [s1] Path -> Partition: - -mr-10003default.src{} + -mr-10002default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1862,11 +1849,11 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10003default.src{} [s2] + -mr-10002default.src{} [s1] Map 3 Map Operator Tree: TableScan - alias: s1 + alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -1878,13 +1865,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 0 + tag: 1 value expressions: key (type: string) auto parallelism: true Path -> Alias: - -mr-10002default.src{} [s1] + -mr-10003default.src{} [s2] Path -> Partition: - -mr-10002default.src{} + -mr-10003default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1929,7 +1916,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10002default.src{} [s1] + -mr-10003default.src{} [s2] Reducer 2 Needs Tagging: false Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/tez/parallel.q.out b/ql/src/test/results/clientpositive/tez/parallel.q.out index a1ccd76..8d1748d 100644 --- a/ql/src/test/results/clientpositive/tez/parallel.q.out +++ b/ql/src/test/results/clientpositive/tez/parallel.q.out @@ -69,54 +69,42 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Forward - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_a + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_a Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_b + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_b Stage: Stage-3 Dependency Collection diff --git a/ql/src/test/results/clientpositive/tez/script_pipe.q.out b/ql/src/test/results/clientpositive/tez/script_pipe.q.out index 89c1af7..fb95cbc 100644 --- a/ql/src/test/results/clientpositive/tez/script_pipe.q.out +++ b/ql/src/test/results/clientpositive/tez/script_pipe.q.out @@ -40,24 +40,20 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Transform Operator + command: true + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Transform Operator - command: true - output info: + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/skewjoin.q.out b/ql/src/test/results/clientpositive/tez/skewjoin.q.out index e42326b..7db7bce 100644 --- a/ql/src/test/results/clientpositive/tez/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/skewjoin.q.out @@ -94,7 +94,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -104,11 +104,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -118,6 +117,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -204,7 +204,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -246,7 +246,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -336,7 +336,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: d + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -378,7 +378,7 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -461,20 +461,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) - Map 4 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -486,6 +472,20 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: key (type: string), val (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -496,19 +496,15 @@ STAGE PLANS: 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -616,19 +612,15 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -744,19 +736,15 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} {VALUE._col0} outputColumnNames: _col2, _col3 Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -841,35 +829,35 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -899,19 +887,15 @@ STAGE PLANS: 2 outputColumnNames: _col0, _col3 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -983,25 +967,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: v + alias: k Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) + key expressions: (key + 1) (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) + Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: val (type: string) + value expressions: key (type: string) Map 4 Map Operator Tree: TableScan - alias: k + alias: v Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: (key + 1) (type: double) + key expressions: UDFToDouble(key) (type: double) sort order: + - Map-reduce partition columns: (key + 1) (type: double) + Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - value expressions: key (type: string) + value expressions: val (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -1012,19 +996,15 @@ STAGE PLANS: 1 {VALUE._col1} outputColumnNames: _col0, _col6 Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col6 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/subquery_in.q.out b/ql/src/test/results/clientpositive/tez/subquery_in.q.out index b8043dd..51d7d1c 100644 --- a/ql/src/test/results/clientpositive/tez/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/tez/subquery_in.q.out @@ -28,6 +28,20 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -47,20 +61,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -265,9 +265,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 5 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -275,17 +275,6 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: p_mfgr (type: string), p_size (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(p_size) is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE @@ -295,8 +284,40 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(p_size) (type: double) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_size (type: int) Reducer 2 Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col5} + 1 + outputColumnNames: _col1, _col5 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator @@ -317,7 +338,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: struct) - Reducer 3 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) @@ -327,41 +348,16 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) + Group By Operator + keys: _col0 (type: double) + mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {VALUE._col5} - 1 - outputColumnNames: _col1, _col5 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -602,20 +598,16 @@ STAGE PLANS: Filter Operator predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -644,20 +636,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -740,14 +728,32 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -759,28 +765,6 @@ STAGE PLANS: Map-reduce partition columns: l_partkey (type: int) Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE value expressions: l_orderkey (type: int), l_suppkey (type: int) - Map 4 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -805,6 +789,18 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -812,14 +808,14 @@ STAGE PLANS: 0 {KEY.reducesinkkey0} 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: int) - Reducer 3 + Reducer 4 Reduce Operator Tree: Merge Join Operator condition map: @@ -828,34 +824,18 @@ STAGE PLANS: 0 {VALUE._col0} {VALUE._col2} 1 outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/temp_table.q.out b/ql/src/test/results/clientpositive/tez/temp_table.q.out index 42dcca5..26f068c 100644 --- a/ql/src/test/results/clientpositive/tez/temp_table.q.out +++ b/ql/src/test/results/clientpositive/tez/temp_table.q.out @@ -213,31 +213,25 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: bar + alias: foo Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: foo + alias: bar Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_bmj_schema_evolution.q.out b/ql/src/test/results/clientpositive/tez/tez_bmj_schema_evolution.q.out index aec18a8..e628e2a 100644 --- a/ql/src/test/results/clientpositive/tez/tez_bmj_schema_evolution.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_bmj_schema_evolution.q.out @@ -78,26 +78,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: test1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: test Statistics: Num rows: 1000 Data size: 52312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -114,18 +101,27 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 28771 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 550 Data size: 28771 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 550 Data size: 28771 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/tez/tez_dml.q.out b/ql/src/test/results/clientpositive/tez/tez_dml.q.out index 9b7f564..48223f4 100644 --- a/ql/src/test/results/clientpositive/tez/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_dml.q.out @@ -47,25 +47,21 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out b/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out index e698d72..05f39c1 100644 --- a/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out @@ -37,30 +37,30 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: orc_src - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: orc_src + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Merge Join Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_join_tests.q.out b/ql/src/test/results/clientpositive/tez/tez_join_tests.q.out index 004b60c..66d8798 100644 --- a/ql/src/test/results/clientpositive/tez/tez_join_tests.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_join_tests.q.out @@ -16,15 +16,25 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -33,7 +43,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: c @@ -44,16 +54,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Map 7 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -95,15 +95,11 @@ STAGE PLANS: 1 {VALUE._col0} {KEY.reducesinkkey0} outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 5 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out b/ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out index 9820eff..dfd0a6f 100644 --- a/ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_joins_explain.q.out @@ -16,15 +16,25 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -33,7 +43,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: c @@ -44,16 +54,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Map 7 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -95,15 +95,11 @@ STAGE PLANS: 1 {VALUE._col0} {KEY.reducesinkkey0} outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 5 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_smb_1.q.out b/ql/src/test/results/clientpositive/tez/tez_smb_1.q.out index e65ef01..17111ab 100644 --- a/ql/src/test/results/clientpositive/tez/tez_smb_1.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_smb_1.q.out @@ -150,17 +150,15 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_smb_main.q.out b/ql/src/test/results/clientpositive/tez/tez_smb_main.q.out index 1f4e305..6db7150 100644 --- a/ql/src/test/results/clientpositive/tez/tez_smb_main.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_smb_main.q.out @@ -18,30 +18,30 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 Reduce Operator Tree: @@ -244,13 +244,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -266,33 +279,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -350,13 +348,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -372,33 +383,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -456,13 +452,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -478,33 +487,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -558,13 +552,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -581,7 +589,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col1 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -593,20 +601,18 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + value expressions: _col0 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: c @@ -619,21 +625,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -694,29 +686,29 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -726,17 +718,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -797,26 +787,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 5 <- Union 6 (CONTAINS) - Map 7 <- Union 6 (CONTAINS) - Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS) + Map 6 <- Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 1 Map Operator Tree: TableScan alias: s3 @@ -839,14 +816,11 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Map 7 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 6 Map Operator Tree: TableScan alias: s2 @@ -859,6 +833,19 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Merge Join Operator @@ -868,17 +855,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 279 Data size: 2963 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 279 Data size: 2963 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -897,8 +882,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 6 - Vertex: Union 6 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -918,13 +903,26 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -940,33 +938,18 @@ STAGE PLANS: 0 value (type: string) 1 value (type: string) input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1018,13 +1001,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 2 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1041,7 +1038,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col1 input vertices: - 0 Map 4 + 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1053,20 +1050,18 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + value expressions: _col0 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: c @@ -1079,21 +1074,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 61 Data size: 646 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1155,45 +1136,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 6 <- Map 2 (BROADCAST_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (CUSTOM_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s3 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 3 - Map Operator Tree: - TableScan alias: s1 Filter Operator predicate: key is not null (type: boolean) @@ -1208,30 +1158,39 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0 input vertices: - 1 Map 1 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - input vertices: - 1 Map 2 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Map 6 + 1 Map 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 1 Map 6 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: s3 + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: s2 @@ -1250,16 +1209,33 @@ STAGE PLANS: 0 _col0 (type: int) 1 key (type: int) input vertices: - 1 Map 2 - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reducer 5 + 1 Map 6 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1277,8 +1253,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 4 - Vertex: Union 4 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_union.q.out b/ql/src/test/results/clientpositive/tez/tez_union.q.out index e1ea884..8ac475e 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union.q.out @@ -41,15 +41,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 3 Map Operator Tree: TableScan @@ -159,10 +156,10 @@ STAGE PLANS: Tez Edges: Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 6 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) - Map 8 <- Union 6 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Map 5 <- Union 2 (CONTAINS) + Map 6 <- Union 7 (CONTAINS) + Map 8 <- Union 7 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -192,7 +189,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Map 7 + Map 6 Map Operator Tree: TableScan alias: src @@ -227,17 +224,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -258,8 +253,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 - Union 6 - Vertex: Union 6 + Union 7 + Vertex: Union 7 Stage: Stage-0 Fetch Operator @@ -316,31 +311,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Union 3 (CONTAINS) - Map 4 <- Map 1 (BROADCAST_EDGE), Union 3 (CONTAINS) + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 3 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: src Filter Operator predicate: key is not null (type: boolean) @@ -358,7 +335,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col5 input vertices: - 0 Map 1 + 0 Map 4 Select Operator expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 @@ -368,7 +345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -388,7 +365,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col5 input vertices: - 0 Map 1 + 0 Map 4 Select Operator expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 @@ -398,8 +375,26 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -483,16 +478,57 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 3 (CONTAINS) - Map 7 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 3 (CONTAINS) - Map 9 <- Map 1 (BROADCAST_EDGE), Map 10 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) + Map 1 <- Map 10 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 5 <- Map 10 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 7 <- Map 10 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 + alias: s2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} + 1 {_col0} + 2 {key} + keys: + 0 key (type: string) + 1 _col0 (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col5, _col6 + input vertices: + 0 Map 9 + 2 Map 10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + value expressions: _col0 (type: string), _col1 (type: string) + Map 10 + Map Operator Tree: + TableScan + alias: s8 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -512,10 +548,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 10 + Map 4 Map Operator Tree: TableScan - alias: s5 + alias: s3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -525,10 +561,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 2 + Map 5 Map Operator Tree: TableScan - alias: s2 + alias: s4 Filter Operator predicate: key is not null (type: boolean) Map Join Operator @@ -542,50 +578,34 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} - 1 {_col0} - 2 {key} - keys: - 0 key (type: string) - 1 _col0 (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col5, _col6 - input vertices: - 0 Map 1 - 2 Map 8 - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - value expressions: _col0 (type: string), _col1 (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + 1 Map 6 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} + 1 {_col0} + 2 {key} + keys: + 0 key (type: string) + 1 _col0 (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col5, _col6 + input vertices: + 0 Map 9 + 2 Map 10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + value expressions: _col0 (type: string), _col1 (type: string) Map 6 Map Operator Tree: TableScan - alias: s7 + alias: s5 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -612,37 +632,34 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0 input vertices: - 1 Map 6 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} - 1 {_col0} - 2 {key} - keys: - 0 key (type: string) - 1 _col0 (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col5, _col6 - input vertices: - 0 Map 1 - 2 Map 8 - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - value expressions: _col0 (type: string), _col1 (type: string) + 1 Map 8 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {key} + 1 {_col0} + 2 {key} + keys: + 0 key (type: string) + 1 _col0 (type: string) + 2 key (type: string) + outputColumnNames: _col0, _col5, _col6 + input vertices: + 0 Map 9 + 2 Map 10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + value expressions: _col0 (type: string), _col1 (type: string) Map 8 Map Operator Tree: TableScan - alias: s8 + alias: s7 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -652,6 +669,14 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + @@ -662,51 +687,12 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 9 - Map Operator Tree: - TableScan - alias: s4 - Filter Operator - predicate: key is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 10 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} - 1 {_col0} - 2 {key} - keys: - 0 key (type: string) - 1 _col0 (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col5, _col6 - input vertices: - 0 Map 1 - 2 Map 8 - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - value expressions: _col0 (type: string), _col1 (type: string) - Reducer 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string) @@ -719,8 +705,8 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -909,15 +895,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 3 Map Operator Tree: TableScan @@ -952,15 +935,12 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -1061,8 +1041,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 3 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 3 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1083,7 +1063,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 3 + 1 Map 4 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1096,23 +1076,6 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) - Map 4 - Map Operator Tree: - TableScan alias: src Select Operator expressions: key (type: string), value (type: string) @@ -1128,7 +1091,7 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 3 + 1 Map 4 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -1138,6 +1101,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Union 2 Vertex: Union 2 @@ -1166,8 +1146,8 @@ STAGE PLANS: Tez Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (BROADCAST_EDGE) - Map 4 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1184,6 +1164,17 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: s Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -1210,17 +1201,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map 4 - Map Operator Tree: - TableScan - alias: src - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) Union 2 Vertex: Union 2 diff --git a/ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out b/ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out index f5bad7f..256ec35 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union_group_by.q.out @@ -146,16 +146,17 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 10 <- Union 7 (CONTAINS) - Map 6 <- Union 7 (CONTAINS) - Map 9 <- Union 7 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 8 <- Union 7 (SIMPLE_EDGE) + Map 1 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Map 6 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map 10 Map Operator Tree: TableScan alias: v @@ -168,28 +169,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: t (type: string), st (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 10 Map 5 Map 6 - Map 9 - Reducer 2 + Map 7 + Reducer 3 Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - nullSafes: [false, true] - outputColumnNames: _col0 + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Filter Operator + predicate: (_col1 <= '2014-09-02') (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash + Select Operator + expressions: _col0 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator @@ -197,22 +192,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -236,26 +215,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + nullSafes: [false, true] + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 9 + Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) keys: KEY._col0 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (_col1 <= '2014-09-02') (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Union 7 - Vertex: Union 7 + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/union3.q.out b/ql/src/test/results/clientpositive/tez/union3.q.out index ccb8052..d4b5119 100644 --- a/ql/src/test/results/clientpositive/tez/union3.q.out +++ b/ql/src/test/results/clientpositive/tez/union3.q.out @@ -46,12 +46,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -67,7 +67,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Map 4 + Map 10 Map Operator Tree: TableScan alias: src @@ -80,7 +80,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Map 6 + Map 5 Map Operator Tree: TableScan alias: src @@ -93,7 +93,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Map 9 + Map 8 Map Operator Tree: TableScan alias: src @@ -106,23 +106,37 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 10 + Reducer 11 Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit + Number of rows: 1 + Select Operator + expressions: 4 (type: int) + outputColumnNames: _col0 Select Operator - expressions: 2 (type: int) + expressions: _col0 (type: int) outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 + Reduce Operator Tree: + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 11 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -136,57 +150,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 - Reduce Operator Tree: - Select Operator - Limit - Number of rows: 1 - Select Operator - expressions: 4 (type: int) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Reduce Operator Tree: - Select Operator - Limit - Number of rows: 1 - Select Operator - expressions: 3 (type: int) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Select Operator + Limit + Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 8 + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -200,8 +178,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 + Reducer 9 + Reduce Operator Tree: + Limit + Number of rows: 1 + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 4 + Vertex: Union 4 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/union6.q.out b/ql/src/test/results/clientpositive/tez/union6.q.out index 0e24b10..f976fcb 100644 --- a/ql/src/test/results/clientpositive/tez/union6.q.out +++ b/ql/src/test/results/clientpositive/tez/union6.q.out @@ -61,16 +61,13 @@ STAGE PLANS: Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Reducer 2 Reduce Operator Tree: Group By Operator @@ -80,16 +77,13 @@ STAGE PLANS: Select Operator expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/tez/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_decimal_mapjoin.q.out index 049be26..3fb47f5 100644 --- a/ql/src/test/results/clientpositive/tez/vector_decimal_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_decimal_mapjoin.q.out @@ -38,26 +38,12 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: r - Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cint = 6981) (type: boolean) - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: 6981 (type: int) - sort order: + - Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE - value expressions: cdecimal2 (type: decimal(23,14)) - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan alias: l Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -74,7 +60,7 @@ STAGE PLANS: 1 6981 (type: int) outputColumnNames: _col1, _col9 input vertices: - 1 Map 1 + 1 Map 2 Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) @@ -88,6 +74,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint = 6981) (type: boolean) + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE + value expressions: cdecimal2 (type: decimal(23,14)) + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out index d45fbde..768a343 100644 --- a/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out @@ -387,30 +387,26 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0) - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out index 71367c4..8ebbe9881 100644 --- a/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out @@ -24,24 +24,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: hd - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ctinyint (type: tinyint) - sort order: + - Map-reduce partition columns: ctinyint (type: tinyint) - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan alias: c Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -55,7 +44,7 @@ STAGE PLANS: 1 cint (type: int) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -67,21 +56,19 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 ctinyint (type: tinyint) input vertices: - 1 Map 1 + 1 Map 4 Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: cd @@ -92,7 +79,18 @@ STAGE PLANS: Map-reduce partition columns: cint (type: int) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 3 + Map 4 + Map Operator Tree: + TableScan + alias: hd + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ctinyint (type: tinyint) + sort order: + + Map-reduce partition columns: ctinyint (type: tinyint) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out index 4c48287..7eeadb9 100644 --- a/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out @@ -30,12 +30,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -47,28 +65,6 @@ STAGE PLANS: Map-reduce partition columns: l_partkey (type: int) Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE value expressions: l_orderkey (type: int), l_suppkey (type: int) - Map 2 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -91,54 +87,50 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 condition expressions: - 0 {_col0} - 1 {l_orderkey} {l_suppkey} + 0 {_col0} {_col3} + 1 keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 4 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 @@ -193,12 +185,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -210,28 +220,6 @@ STAGE PLANS: Map-reduce partition columns: l_partkey (type: int) Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE value expressions: l_orderkey (type: int), l_suppkey (type: int) - Map 2 - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -254,54 +242,50 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 condition expressions: - 0 {_col0} - 1 {l_orderkey} {l_suppkey} + 0 {_col0} {_col3} + 1 keys: - 0 _col0 (type: int) - 1 l_partkey (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 input vertices: - 1 Map 1 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 _col1 (type: int), 1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 4 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/tez/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/tez/vector_orderby_5.q.out index 8a4d25a..c968d72 100644 --- a/ql/src/test/results/clientpositive/tez/vector_orderby_5.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_orderby_5.q.out @@ -148,26 +148,22 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: boolean), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: - - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: - + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vector_reduce_groupby_decimal.q.out b/ql/src/test/results/clientpositive/tez/vector_reduce_groupby_decimal.q.out index d38faab..6c6c4a0 100644 --- a/ql/src/test/results/clientpositive/tez/vector_reduce_groupby_decimal.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_reduce_groupby_decimal.q.out @@ -44,22 +44,18 @@ STAGE PLANS: Filter Operator predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdouble, cdecimal1, cdecimal2 + Group By Operator + aggregations: min(cdecimal1) + keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(cdecimal1) - keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - sort order: ++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(20,10)) + value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -68,23 +64,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)), _col4 (type: decimal(20,10)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - sort order: ++++ - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(20,10)) + Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + sort order: ++++ + Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/tez/vectorization_0.q.out b/ql/src/test/results/clientpositive/tez/vectorization_0.q.out index 7703158..f7228df 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_0.q.out @@ -54,15 +54,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -152,14 +148,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -259,15 +251,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Reduce Operator Tree: Select Operator @@ -372,15 +360,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -470,14 +454,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -577,15 +557,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Reduce Operator Tree: Select Operator @@ -690,15 +666,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: float) - sort order: + - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -788,14 +760,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 3 Reduce Operator Tree: @@ -895,15 +863,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) Reducer 3 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index cd778e2..8e0d7a1 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -500,23 +500,19 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint), _col0 (type: double) - sort order: ++ - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: double) + sort order: ++ + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/tez/vectorized_bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/tez/vectorized_bucketmapjoin1.q.out index e00b7bb..7e3a9b2 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_bucketmapjoin1.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -129,7 +129,7 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -203,32 +203,32 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -301,32 +301,32 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Merge Join Operator diff --git a/ql/src/test/results/clientpositive/tez/vectorized_context.q.out b/ql/src/test/results/clientpositive/tez/vectorized_context.q.out index c7f0562..6ebd11a 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_context.q.out @@ -110,16 +110,17 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: household_demographics - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: hd_demo_sk is not null (type: boolean) - Statistics: Num rows: 3038 Data size: 12152 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 1519 Data size: 18186 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: hd_demo_sk (type: int) + key expressions: ss_store_sk (type: int) sort order: + - Map-reduce partition columns: hd_demo_sk (type: int) - Statistics: Num rows: 3038 Data size: 12152 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: ss_store_sk (type: int) + Statistics: Num rows: 1519 Data size: 18186 Basic stats: COMPLETE Column stats: NONE + value expressions: ss_hdemo_sk (type: int), ss_net_profit (type: double) Execution mode: vectorized Map 2 Map Operator Tree: @@ -140,7 +141,7 @@ STAGE PLANS: 1 s_store_sk (type: int) outputColumnNames: _col1, _col2, _col7 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 3341 Data size: 338652 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -153,7 +154,7 @@ STAGE PLANS: 1 hd_demo_sk (type: int) outputColumnNames: _col2, _col7 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 3675 Data size: 372517 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), _col2 (type: double) @@ -173,17 +174,16 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 1519 Data size: 18186 Basic stats: COMPLETE Column stats: NONE + predicate: hd_demo_sk is not null (type: boolean) + Statistics: Num rows: 3038 Data size: 12152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: ss_store_sk (type: int) + key expressions: hd_demo_sk (type: int) sort order: + - Map-reduce partition columns: ss_store_sk (type: int) - Statistics: Num rows: 1519 Data size: 18186 Basic stats: COMPLETE Column stats: NONE - value expressions: ss_hdemo_sk (type: int), ss_net_profit (type: double) + Map-reduce partition columns: hd_demo_sk (type: int) + Statistics: Num rows: 3038 Data size: 12152 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out index 8f08aca..27b678a 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_date_funcs.q.out @@ -965,15 +965,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: date) - sort order: + - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index 5cf41f0..a21108b 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -253,17 +253,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -360,17 +358,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -445,8 +441,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -464,19 +460,19 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr is not null and (hour = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (hour = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: hr (type: string) + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: hr (type: string) + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: hr (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -486,26 +482,27 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: (hr is not null and (hour = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: (hr is not null and (hour = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: hr (type: string) sort order: + - Map-reduce partition columns: ds (type: string) + Map-reduce partition columns: hr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: hr (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -515,11 +512,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -544,17 +540,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -618,8 +612,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -638,32 +632,32 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr is not null and (hour = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (hour = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: hr (type: string) + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: hr (type: string) + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: (hr is not null and (hour = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: (hr is not null and (hour = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: hr (type: string) sort order: + - Map-reduce partition columns: ds (type: string) + Map-reduce partition columns: hr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -688,17 +682,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -840,17 +832,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -946,17 +936,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1081,17 +1069,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1188,17 +1174,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1323,17 +1307,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1447,17 +1429,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1556,17 +1536,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1665,17 +1643,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1802,17 +1778,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1885,15 +1859,35 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1915,29 +1909,22 @@ STAGE PLANS: Partition key expr: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Target column: ds - Target Vertex: Map 4 - Map 4 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -1945,18 +1932,16 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1975,23 +1960,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -2390,16 +2358,6 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (date = '2008-04-08') (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE @@ -2425,8 +2383,18 @@ STAGE PLANS: Partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Target column: ds - Target Vertex: Map 1 + Target Vertex: Map 4 Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -2436,17 +2404,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -2579,8 +2545,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2598,18 +2564,19 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: ds (type: string) sort order: + + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: '11' (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -2619,26 +2586,26 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: '11' (type: string) sort order: + - Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: '11' (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -2648,11 +2615,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -2675,17 +2641,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2745,8 +2709,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2754,19 +2718,6 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr = 13) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (hr = 13) (type: boolean) - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '13' (type: string) - sort order: + - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE @@ -2779,6 +2730,19 @@ STAGE PLANS: Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: srcpart_hour + filterExpr: (hr = 13) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (hr = 13) (type: boolean) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '13' (type: string) + sort order: + + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -2801,17 +2765,15 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -2867,23 +2829,34 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -2891,17 +2864,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -2912,7 +2874,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -2922,40 +2884,6 @@ STAGE PLANS: value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Reducer 4 - Reduce Operator Tree: Merge Join Operator condition map: Left Semi Join 0 to 1 @@ -2963,18 +2891,16 @@ STAGE PLANS: 0 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2993,7 +2919,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3001,9 +2927,14 @@ STAGE PLANS: outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3011,39 +2942,56 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Union 3 - Vertex: Union 3 - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -#### A masked pattern was here #### + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Union 6 + Vertex: Union 6 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart @@ -3065,23 +3013,34 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 6 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: srcpart Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -3089,17 +3048,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -3110,7 +3058,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -3120,40 +3068,6 @@ STAGE PLANS: value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Reducer 4 - Reduce Operator Tree: Merge Join Operator condition map: Left Semi Join 0 to 1 @@ -3162,21 +3076,17 @@ STAGE PLANS: 1 outputColumnNames: _col2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3195,7 +3105,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 8 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -3203,9 +3113,42 @@ STAGE PLANS: outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3213,24 +3156,13 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 6 - Union 3 - Vertex: Union 3 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Union 6 + Vertex: Union 6 Stage: Stage-0 Fetch Operator @@ -3268,31 +3200,29 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 7 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 7 (CONTAINS) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ds) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 10 Map Operator Tree: TableScan @@ -3303,7 +3233,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE @@ -3317,51 +3247,50 @@ STAGE PLANS: alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 7 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3369,45 +3298,11 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 8 - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -3415,34 +3310,22 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 5 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 8 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 5 + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Execution mode: vectorized Reducer 4 Reduce Operator Tree: Merge Join Operator @@ -3470,32 +3353,55 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Execution mode: vectorized - Reducer 9 + Reducer 8 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Execution mode: vectorized + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 5 Union 3 Vertex: Union 3 - Union 7 - Vertex: Union 7 + Union 9 + Vertex: Union 9 Stage: Stage-0 Fetch Operator @@ -3559,17 +3465,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -3694,7 +3598,7 @@ STAGE PLANS: 1 ds (type: string) outputColumnNames: _col3 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -3706,35 +3610,33 @@ STAGE PLANS: 0 _col3 (type: string) 1 hr (type: string) input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: (hr is not null and (hour = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (hour = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: hr (type: string) + key expressions: ds (type: string) sort order: + - Map-reduce partition columns: hr (type: string) + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: hr (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -3744,26 +3646,27 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Map 4 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: (hr is not null and (hour = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: (hr is not null and (hour = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: hr (type: string) sort order: + - Map-reduce partition columns: ds (type: string) + Map-reduce partition columns: hr (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: hr (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -3773,11 +3676,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -3873,17 +3775,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4020,17 +3920,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4132,17 +4030,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4254,17 +4150,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan @@ -4366,7 +4260,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE) + Map 3 <- Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -4375,6 +4269,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: ds (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -4387,42 +4301,47 @@ STAGE PLANS: 0 ds (type: string) 1 _col0 (type: string) input vertices: - 1 Reducer 4 + 1 Reducer 2 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Target column: ds + Target Vertex: Map 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -4439,37 +4358,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 Stage: Stage-0 Fetch Operator @@ -4607,23 +4495,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 2 - Map Operator Tree: - TableScan alias: srcpart_date filterExpr: (date = '2008-04-08') (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE @@ -4640,21 +4518,29 @@ STAGE PLANS: 0 ds (type: string) 1 ds (type: string) input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 3 + Map 3 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4806,7 +4692,7 @@ STAGE PLANS: 0 ds (type: string) 1 ds (type: string) input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -4818,34 +4704,33 @@ STAGE PLANS: 0 '11' (type: string) 1 '11' (type: string) input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan - alias: srcpart_hour - filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) - Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: ds (type: string) sort order: + + Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: '11' (type: string) + expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -4855,26 +4740,26 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: hr + Partition key expr: ds Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: hr + Target column: ds Target Vertex: Map 1 + Execution mode: vectorized Map 4 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_hour + filterExpr: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((hr is not null and (hour = 11)) and (hr = 11)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator - key expressions: ds (type: string) + key expressions: '11' (type: string) sort order: + - Map-reduce partition columns: ds (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: '11' (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator @@ -4884,11 +4769,10 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Dynamic Partitioning Event Operator Target Input: srcpart - Partition key expr: ds + Partition key expr: hr Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Target column: ds + Target column: hr Target Vertex: Map 1 - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator @@ -4948,15 +4832,30 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map 2 Map Operator Tree: TableScan + alias: srcpart_date + filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan alias: srcpart_hour filterExpr: (hr = 13) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE @@ -4975,33 +4874,16 @@ STAGE PLANS: input vertices: 0 Map 1 Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 189 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: ds (type: string) - sort order: + - Map-reduce partition columns: ds (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Execution mode: vectorized - Reducer 3 + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5049,34 +4931,16 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 4 <- Union 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 1 <- Union 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 5 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ds) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -5090,23 +4954,19 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2 input vertices: - 1 Union 3 + 1 Union 5 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string) - outputColumnNames: _col2 + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -5124,41 +4984,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Filter Operator - predicate: _col0 is not null (type: boolean) + Map 6 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ds) + mode: hash outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 4 - Reducer 5 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -5177,7 +5021,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) @@ -5185,9 +5029,14 @@ STAGE PLANS: outputColumnNames: _col0 Filter Operator predicate: _col0 is not null (type: boolean) - Select Operator - expressions: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 @@ -5195,24 +5044,41 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Target column: ds - Target Vertex: Map 4 - Union 3 - Vertex: Union 3 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Dynamic Partitioning Event Operator + Target Input: srcpart + Partition key expr: ds + Target column: ds + Target Vertex: Map 1 + Union 5 + Vertex: Union 5 Stage: Stage-0 Fetch Operator @@ -5312,17 +5178,15 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 103400 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1100 Data size: 103400 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out index 5aa9599..92e6b06 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out @@ -14,27 +14,13 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: cint (type: int) - sort order: + - Map-reduce partition columns: cint (type: int) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -51,23 +37,33 @@ STAGE PLANS: 1 cint (type: int) outputColumnNames: _col2, _col17 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) diff --git a/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out index 83c4bb8..40d18ae 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out @@ -10,49 +10,14 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Map 4 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Map 3 <- Map 2 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: v3 - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 - keys: - 0 _col0 (type: smallint) - 1 csmallint (type: smallint) - outputColumnNames: _col1 - input vertices: - 0 Map 4 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) - Execution mode: vectorized - Map 3 - Map Operator Tree: - TableScan alias: v1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -65,7 +30,7 @@ STAGE PLANS: Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE value expressions: csmallint (type: smallint), cdouble (type: double) Execution mode: vectorized - Map 4 + Map 2 Map Operator Tree: TableScan alias: v2 @@ -84,7 +49,7 @@ STAGE PLANS: 1 ctinyint (type: tinyint) outputColumnNames: _col0, _col1, _col5, _col15 input vertices: - 0 Map 3 + 0 Map 1 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 = _col15) (type: boolean) @@ -100,7 +65,38 @@ STAGE PLANS: Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Execution mode: vectorized - Reducer 2 + Map 3 + Map Operator Tree: + TableScan + alias: v3 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: csmallint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} + 1 + keys: + 0 _col0 (type: smallint) + 1 csmallint (type: smallint) + outputColumnNames: _col1 + input vertices: + 0 Map 2 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out index c61b5f6..0a76000 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out @@ -665,18 +665,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Position of Big Table: 0 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true + tag: -1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: true Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1942,27 +1938,24 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p1 + alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2011,22 +2004,24 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - Execution mode: vectorized - Map 3 + /part_orc [part_orc] + Map 4 Map Operator Tree: TableScan - alias: part_orc + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) - auto parallelism: true + Filter Operator + isSamplingPred: false + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2075,10 +2070,34 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [part_orc] + /part_orc [p1] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: true + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -2113,29 +2132,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true Stage: Stage-0 Fetch Operator @@ -4137,28 +4133,25 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p1 + alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_partkey (type: int) - sort order: + - Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4207,22 +4200,24 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - Execution mode: vectorized - Map 4 + /part_orc [part_orc] + Map 5 Map Operator Tree: TableScan - alias: part_orc + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) - auto parallelism: true + Filter Operator + isSamplingPred: false + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4271,10 +4266,34 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [part_orc] + /part_orc [p1] + Execution mode: vectorized Reducer 2 Needs Tagging: false Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: true + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -4292,7 +4311,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: true - Reducer 3 + Reducer 4 Needs Tagging: false Reduce Operator Tree: Extract @@ -4324,29 +4343,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true Stage: Stage-0 Fetch Operator @@ -4823,32 +4819,28 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - auto parallelism: true + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) + auto parallelism: true Execution mode: vectorized Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) auto parallelism: true @@ -4856,19 +4848,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out index d65e6c0..726ac93 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) @@ -35,7 +35,7 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) @@ -56,19 +56,15 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col2, _col17 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/type_widening.q.out b/ql/src/test/results/clientpositive/type_widening.q.out index f93c81b..4c4a10f 100644 --- a/ql/src/test/results/clientpositive/type_widening.q.out +++ b/ql/src/test/results/clientpositive/type_widening.q.out @@ -54,14 +54,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -71,14 +67,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) diff --git a/ql/src/test/results/clientpositive/uber_reduce.q.out b/ql/src/test/results/clientpositive/uber_reduce.q.out index 3888380..2a29131 100644 --- a/ql/src/test/results/clientpositive/uber_reduce.q.out +++ b/ql/src/test/results/clientpositive/uber_reduce.q.out @@ -1,12 +1,12 @@ PREHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@T1 POSTHOOK: query: -- Uberized mode is a YARN option, ignore this test for non-YARN Hadoop versions --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20S) CREATE TABLE T1(key STRING, val STRING) POSTHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/udf3.q.out b/ql/src/test/results/clientpositive/udf3.q.out index ae4f837..95762fa 100644 --- a/ql/src/test/results/clientpositive/udf3.q.out +++ b/ql/src/test/results/clientpositive/udf3.q.out @@ -43,18 +43,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/udf_case_column_pruning.q.out b/ql/src/test/results/clientpositive/udf_case_column_pruning.q.out index bc32a5c..ed0bc54 100644 --- a/ql/src/test/results/clientpositive/udf_case_column_pruning.q.out +++ b/ql/src/test/results/clientpositive/udf_case_column_pruning.q.out @@ -28,7 +28,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -39,7 +39,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git a/ql/src/test/results/clientpositive/udf_explode.q.out b/ql/src/test/results/clientpositive/udf_explode.q.out index 69f47de..faebd4e 100644 --- a/ql/src/test/results/clientpositive/udf_explode.q.out +++ b/ql/src/test/results/clientpositive/udf_explode.q.out @@ -208,24 +208,20 @@ STAGE PLANS: UDTF Operator Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE function name: explode - Select Operator - expressions: col (type: int) - outputColumnNames: col - Statistics: Num rows: 500 Data size: 28000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: col (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: col (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -565,24 +561,20 @@ STAGE PLANS: UDTF Operator Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE function name: explode - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 259500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: key (type: int), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Group By Operator + aggregations: count(1) + keys: key (type: int), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/udtf_explode.q.out b/ql/src/test/results/clientpositive/udtf_explode.q.out index d3eb300..6a01366 100644 --- a/ql/src/test/results/clientpositive/udtf_explode.q.out +++ b/ql/src/test/results/clientpositive/udtf_explode.q.out @@ -275,33 +275,29 @@ STAGE PLANS: Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types int,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Group By Operator + aggregations: count(1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types int,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -605,33 +601,29 @@ STAGE PLANS: Limit Number of rows: 3 Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int,string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Group By Operator + aggregations: count(1) + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int,string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/udtf_json_tuple.q.out b/ql/src/test/results/clientpositive/udtf_json_tuple.q.out index 61becaa..9574179 100644 --- a/ql/src/test/results/clientpositive/udtf_json_tuple.q.out +++ b/ql/src/test/results/clientpositive/udtf_json_tuple.q.out @@ -354,17 +354,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 157 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 157 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 9 Data size: 354 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -373,16 +369,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 4 Data size: 157 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 354 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 157 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 354 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 157 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 354 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/udtf_stack.q.out b/ql/src/test/results/clientpositive/udtf_stack.q.out index 80edb65..4e9c2fd 100644 --- a/ql/src/test/results/clientpositive/udtf_stack.q.out +++ b/ql/src/test/results/clientpositive/udtf_stack.q.out @@ -21,10 +21,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 245000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 111000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -48,7 +48,7 @@ STAGE PLANS: function name: stack Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 245000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 111000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -88,10 +88,10 @@ STAGE PLANS: Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 134000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 269000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 135000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -115,7 +115,7 @@ STAGE PLANS: function name: stack Lateral View Join Operator outputColumnNames: _col5, _col6 - Statistics: Num rows: 1000 Data size: 269000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 135000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col5 (type: string), _col6 (type: array) outputColumnNames: _col0, _col1 @@ -157,3 +157,51 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### x [1] z [4] +PREHOOK: query: EXPLAIN +SELECT stack(1, "en", "dbpedia", NULL ) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT stack(1, "en", "dbpedia", NULL ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 'en' (type: string), 'dbpedia' (type: string), null (type: void) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + function name: stack + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT stack(1, "en", "dbpedia", NULL ) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT stack(1, "en", "dbpedia", NULL ) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +en dbpedia NULL diff --git a/ql/src/test/results/clientpositive/union17.q.out b/ql/src/test/results/clientpositive/union17.q.out index f161d6f..dc53d2a 100644 --- a/ql/src/test/results/clientpositive/union17.q.out +++ b/ql/src/test/results/clientpositive/union17.q.out @@ -151,18 +151,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator @@ -194,18 +190,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-1 Move Operator diff --git a/ql/src/test/results/clientpositive/union18.q.out b/ql/src/test/results/clientpositive/union18.q.out index 15424ea..1a7ac6f 100644 --- a/ql/src/test/results/clientpositive/union18.q.out +++ b/ql/src/test/results/clientpositive/union18.q.out @@ -91,18 +91,14 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -124,18 +120,14 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + File Output Operator + compressed: false + Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 diff --git a/ql/src/test/results/clientpositive/union19.q.out b/ql/src/test/results/clientpositive/union19.q.out index 8a566c4..05af3ca 100644 --- a/ql/src/test/results/clientpositive/union19.q.out +++ b/ql/src/test/results/clientpositive/union19.q.out @@ -81,22 +81,18 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -118,22 +114,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 501 Data size: 5584 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -153,18 +145,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/union20.q.out b/ql/src/test/results/clientpositive/union20.q.out index 1aa82e5..280741e 100644 --- a/ql/src/test/results/clientpositive/union20.q.out +++ b/ql/src/test/results/clientpositive/union20.q.out @@ -28,8 +28,8 @@ ON (unionsrc1.key = unionsrc2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 - Stage-6 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -37,7 +37,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: s3 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -80,7 +80,7 @@ STAGE PLANS: Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) TableScan - alias: s4 + alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -98,7 +98,7 @@ STAGE PLANS: Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) TableScan - alias: s2 + alias: s4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) @@ -145,11 +145,11 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: s1 + alias: s3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/union21.q.out b/ql/src/test/results/clientpositive/union21.q.out index 9f2f667..422ffeb 100644 --- a/ql/src/test/results/clientpositive/union21.q.out +++ b/ql/src/test/results/clientpositive/union21.q.out @@ -51,22 +51,18 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: src_thrift Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE @@ -76,22 +72,18 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -101,22 +93,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -126,22 +114,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Union Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -151,22 +135,18 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1522 Data size: 59264 Basic stats: COMPLETE Column stats: PARTIAL - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/union22.q.out b/ql/src/test/results/clientpositive/union22.q.out index 670aac6..a17cc07 100644 --- a/ql/src/test/results/clientpositive/union22.q.out +++ b/ql/src/test/results/clientpositive/union22.q.out @@ -219,13 +219,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage , consists of Stage-8, Stage-1 - Stage-8 has a backup stage: Stage-1 + Stage-7 is a root stage , consists of Stage-8, Stage-4 + Stage-8 has a backup stage: Stage-4 Stage-6 depends on stages: Stage-8 - Stage-2 depends on stages: Stage-1, Stage-6 + Stage-2 depends on stages: Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-1 + Stage-4 STAGE PLANS: Stage: Stage-7 @@ -454,43 +454,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - GatherStats: false - Union - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2/ - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.comments - columns.types string:string:string:string -#### A masked pattern was here #### - name default.dst_union22 - partition_columns ds - partition_columns.types string - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - TableScan alias: dst_union22_delta Statistics: Num rows: 500 Data size: 16936 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -504,39 +467,68 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 5622 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2/ Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Static Partition Specification: ds=2/ - Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k1,k2,k3,k4 + columns.comments + columns.types string:string:string:string #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns k1,k2,k3,k4 - columns.comments - columns.types string:string:string:string + name default.dst_union22 + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - name default.dst_union22 - partition_columns ds - partition_columns.types string - serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Static Partition Specification: ds=2/ + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns k1,k2,k3,k4 + columns.comments + columns.types string:string:string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dst_union22 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + name default.dst_union22 + partition_columns ds + partition_columns.types string + serialization.ddl struct dst_union22 { string k1, string k2, string k3, string k4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dst_union22 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -639,7 +631,7 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/union23.q.out b/ql/src/test/results/clientpositive/union23.q.out index 598ed2b..ca5ab25 100644 --- a/ql/src/test/results/clientpositive/union23.q.out +++ b/ql/src/test/results/clientpositive/union23.q.out @@ -31,23 +31,6 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Transform Operator command: cat output info: @@ -65,6 +48,23 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) diff --git a/ql/src/test/results/clientpositive/union24.q.out b/ql/src/test/results/clientpositive/union24.q.out index 1decd24..0d2d949 100644 --- a/ql/src/test/results/clientpositive/union24.q.out +++ b/ql/src/test/results/clientpositive/union24.q.out @@ -180,12 +180,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -196,24 +196,20 @@ STAGE PLANS: isSamplingPred: false predicate: (key < 10) (type: boolean) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: bigint) - auto parallelism: false + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -271,33 +267,29 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 51 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: src4 + alias: src2 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -336,35 +328,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false TableScan - GatherStats: false - Union - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan alias: src3 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -404,7 +367,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false TableScan - alias: src2 + alias: src4 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -442,6 +405,35 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 360 Data size: 1726 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -793,16 +785,16 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -814,11 +806,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: count (type: bigint) + tag: 0 auto parallelism: false TableScan - alias: a + alias: b Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -830,7 +821,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 + value expressions: count (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -962,35 +954,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - GatherStats: false - Union - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan alias: src2 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1068,6 +1031,35 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 319 Data size: 1531 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1368,17 +1360,17 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -1390,10 +1382,10 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 auto parallelism: false TableScan - alias: a + alias: b Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -1405,7 +1397,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 103 Data size: 494 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -1511,35 +1503,31 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 113 Data size: 543 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false - Stage: Stage-2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -1585,61 +1573,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 56 Data size: 269 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 269 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false - Union - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - TableScan alias: src2 Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -1717,6 +1672,35 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + TableScan + GatherStats: false + Union + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 262 Data size: 1257 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: diff --git a/ql/src/test/results/clientpositive/union25.q.out b/ql/src/test/results/clientpositive/union25.q.out index 1c9e2fb..f314f9f 100644 --- a/ql/src/test/results/clientpositive/union25.q.out +++ b/ql/src/test/results/clientpositive/union25.q.out @@ -57,14 +57,14 @@ FROM ) a GROUP BY key, value POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-2 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 Stage-6 depends on stages: Stage-0 Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -134,11 +134,13 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) @@ -153,30 +155,20 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/union26.q.out b/ql/src/test/results/clientpositive/union26.q.out index 4f9e0d4..ea557d0 100644 --- a/ql/src/test/results/clientpositive/union26.q.out +++ b/ql/src/test/results/clientpositive/union26.q.out @@ -47,16 +47,16 @@ WHERE ds='2008-04-08' and hr='11' group by key, value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -66,8 +66,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -77,7 +78,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reduce Operator Tree: Join Operator condition map: @@ -87,21 +87,32 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Union + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan alias: srcpart Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Lateral View Forward @@ -119,22 +130,18 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Select Operator expressions: array(1,2,3) (type: array) outputColumnNames: _col0 @@ -151,41 +158,18 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - TableScan - Union - Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1275 Data size: 13545 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/union3.q.out b/ql/src/test/results/clientpositive/union3.q.out index 87b4035..353434a 100644 --- a/ql/src/test/results/clientpositive/union3.q.out +++ b/ql/src/test/results/clientpositive/union3.q.out @@ -40,13 +40,13 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3, Stage-5, Stage-7 - Stage-3 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-5, Stage-6, Stage-7 Stage-4 is a root stage Stage-5 depends on stages: Stage-4 Stage-6 is a root stage - Stage-7 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-2 + Stage-7 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -64,26 +64,45 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: - Select Operator + Limit + Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 4 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan Union Statistics: Num rows: 4 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator @@ -140,37 +159,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 3 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 Map Reduce Map Operator Tree: @@ -186,21 +174,19 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: - Select Operator + Limit + Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 + Select Operator + expressions: 2 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Map Reduce @@ -238,42 +224,48 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: - Select Operator + Limit + Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit - Number of rows: 1 + Select Operator + expressions: 3 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - expressions: 2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 + Limit + Number of rows: 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: 4 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/union30.q.out b/ql/src/test/results/clientpositive/union30.q.out index 59934c6..235b64a 100644 --- a/ql/src/test/results/clientpositive/union30.q.out +++ b/ql/src/test/results/clientpositive/union30.q.out @@ -47,20 +47,20 @@ select key, value from src ) aa POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-10 - Stage-3 depends on stages: Stage-2 - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-10 depends on stages: Stage-9, Stage-11 + Stage-2 depends on stages: Stage-10 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 - Stage-10 is a root stage + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-11 is a root stage STAGE PLANS: - Stage: Stage-1 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -100,7 +100,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -130,25 +130,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -190,11 +175,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + TableScan + Union + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union - Stage: Stage-9 + Stage: Stage-8 Conditional Operator - Stage: Stage-6 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -210,10 +210,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -225,7 +225,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -237,13 +237,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Stage: Stage-8 + Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/union31.q.out b/ql/src/test/results/clientpositive/union31.q.out index faf17f4..9b970c0 100644 --- a/ql/src/test/results/clientpositive/union31.q.out +++ b/ql/src/test/results/clientpositive/union31.q.out @@ -81,7 +81,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -122,7 +122,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan - alias: t1 + alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -266,10 +266,10 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Output: default@t3 POSTHOOK: Output: default@t4 -POSTHOOK: Lineage: t3.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] -POSTHOOK: Lineage: t3.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t4.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] -POSTHOOK: Lineage: t4.value EXPRESSION [(t2)t2.FieldSchema(name:value, type:string, comment:null), (t1)t1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: t3.cnt EXPRESSION [(t1)t1.null, (t2)t2.null, ] +POSTHOOK: Lineage: t3.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t2)t2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t4.cnt EXPRESSION [(t1)t1.null, (t2)t2.null, ] +POSTHOOK: Lineage: t4.value EXPRESSION [(t1)t1.FieldSchema(name:value, type:string, comment:null), (t2)t2.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select * from t3 PREHOOK: type: QUERY PREHOOK: Input: default@t3 @@ -352,7 +352,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -377,16 +377,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -394,29 +390,21 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) TableScan Union Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reduce Operator Tree: Forward Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -487,7 +475,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -512,16 +500,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: from ( @@ -553,10 +537,10 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Output: default@t5 POSTHOOK: Output: default@t6 -POSTHOOK: Lineage: t5.c1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t5.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] -POSTHOOK: Lineage: t6.c1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t6.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] +POSTHOOK: Lineage: t5.c1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t2)t2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t5.cnt EXPRESSION [(t1)t1.null, (t2)t2.null, ] +POSTHOOK: Lineage: t6.c1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t2)t2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t6.cnt EXPRESSION [(t1)t1.null, (t2)t2.null, ] PREHOOK: query: select * from t5 PREHOOK: type: QUERY PREHOOK: Input: default@t5 @@ -662,15 +646,15 @@ insert overwrite table t8 select c1, count(1) group by c1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-3 depends on stages: Stage-6 + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -714,6 +698,18 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Union + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + TableScan alias: t2 Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -731,18 +727,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE - TableScan - Union - Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Forward Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE @@ -839,10 +823,10 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Output: default@t7 POSTHOOK: Output: default@t8 -POSTHOOK: Lineage: t7.c1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t7.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] -POSTHOOK: Lineage: t8.c1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: t8.cnt EXPRESSION [(t2)t2.null, (t1)t1.null, ] +POSTHOOK: Lineage: t7.c1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t2)t2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t7.cnt EXPRESSION [(t1)t1.null, (t2)t2.null, ] +POSTHOOK: Lineage: t8.c1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t2)t2.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t8.cnt EXPRESSION [(t1)t1.null, (t2)t2.null, ] PREHOOK: query: select * from t7 PREHOOK: type: QUERY PREHOOK: Input: default@t7 diff --git a/ql/src/test/results/clientpositive/union32.q.out b/ql/src/test/results/clientpositive/union32.q.out index 046a9fc..ce38511 100644 --- a/ql/src/test/results/clientpositive/union32.q.out +++ b/ql/src/test/results/clientpositive/union32.q.out @@ -61,14 +61,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -78,14 +74,10 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -156,16 +148,16 @@ UNION ALL SELECT CAST(key AS DOUBLE) AS key FROM t2) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -176,7 +168,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -210,6 +202,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Union + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -229,20 +235,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -313,12 +305,12 @@ UNION ALL SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -367,20 +359,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -400,6 +378,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -470,16 +462,16 @@ UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -490,7 +482,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -524,6 +516,20 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Union + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -543,20 +549,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -627,12 +619,12 @@ UNION ALL SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -681,20 +673,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - Union - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TableScan alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -714,6 +692,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 108 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/union33.q.out b/ql/src/test/results/clientpositive/union33.q.out index 911b3e9..0d9c261 100644 --- a/ql/src/test/results/clientpositive/union33.q.out +++ b/ql/src/test/results/clientpositive/union33.q.out @@ -124,33 +124,25 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Stage: Stage-8 Conditional Operator @@ -338,18 +330,14 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -362,18 +350,14 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_src + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_src Stage: Stage-9 Conditional Operator diff --git a/ql/src/test/results/clientpositive/union34.q.out b/ql/src/test/results/clientpositive/union34.q.out index 0ab57d6..2f45fca 100644 --- a/ql/src/test/results/clientpositive/union34.q.out +++ b/ql/src/test/results/clientpositive/union34.q.out @@ -122,15 +122,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -140,15 +136,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -170,21 +162,13 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -290,36 +274,36 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src10_2 + alias: src10_1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: src10_1 + alias: src10_2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -329,16 +313,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -346,15 +326,11 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_3 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -364,15 +340,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) TableScan alias: src10_4 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE @@ -382,15 +354,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 25 Data size: 265 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/union6.q.out b/ql/src/test/results/clientpositive/union6.q.out index a8427fb..d5fe7ca 100644 --- a/ql/src/test/results/clientpositive/union6.q.out +++ b/ql/src/test/results/clientpositive/union6.q.out @@ -76,18 +76,14 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -97,18 +93,14 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 463 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/union_lateralview.q.out b/ql/src/test/results/clientpositive/union_lateralview.q.out index e98298a..bf532b6 100644 --- a/ql/src/test/results/clientpositive/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/union_lateralview.q.out @@ -54,14 +54,6 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -110,6 +102,14 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan alias: srcpart Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -238,7 +238,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Output: default@test_union_lateral_view POSTHOOK: Lineage: test_union_lateral_view.arr_ele EXPRESSION [] POSTHOOK: Lineage: test_union_lateral_view.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_union_lateral_view.value EXPRESSION [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_union_lateral_view.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select key, arr_ele, value from test_union_lateral_view order by key, arr_ele limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@test_union_lateral_view diff --git a/ql/src/test/results/clientpositive/union_remove_1.q.out b/ql/src/test/results/clientpositive/union_remove_1.q.out index 62bc729..51525f7 100644 --- a/ql/src/test/results/clientpositive/union_remove_1.q.out +++ b/ql/src/test/results/clientpositive/union_remove_1.q.out @@ -95,18 +95,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -147,18 +143,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 SELECT * diff --git a/ql/src/test/results/clientpositive/union_remove_10.q.out b/ql/src/test/results/clientpositive/union_remove_10.q.out index 344ed9c..9ae4571 100644 --- a/ql/src/test/results/clientpositive/union_remove_10.q.out +++ b/ql/src/test/results/clientpositive/union_remove_10.q.out @@ -95,40 +95,17 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), UDFToLong(1) (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-6 Conditional Operator @@ -176,17 +153,36 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-8 Map Reduce diff --git a/ql/src/test/results/clientpositive/union_remove_11.q.out b/ql/src/test/results/clientpositive/union_remove_11.q.out index 6db85b6..23ab7c6 100644 --- a/ql/src/test/results/clientpositive/union_remove_11.q.out +++ b/ql/src/test/results/clientpositive/union_remove_11.q.out @@ -93,7 +93,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 2 (type: int) + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union @@ -114,7 +114,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union diff --git a/ql/src/test/results/clientpositive/union_remove_12.q.out b/ql/src/test/results/clientpositive/union_remove_12.q.out index 2ffe1a5..8f2f8b0 100644 --- a/ql/src/test/results/clientpositive/union_remove_12.q.out +++ b/ql/src/test/results/clientpositive/union_remove_12.q.out @@ -69,8 +69,8 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8, Stage-9 , consists of Stage-3, Stage-2, Stage-4 + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1, Stage-9 , consists of Stage-3, Stage-2, Stage-4 Stage-3 Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 Stage-2 @@ -80,7 +80,7 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-10 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -217,7 +217,7 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: desc formatted outputTbl1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/union_remove_13.q.out b/ql/src/test/results/clientpositive/union_remove_13.q.out index 3e129e7..c20e5e1 100644 --- a/ql/src/test/results/clientpositive/union_remove_13.q.out +++ b/ql/src/test/results/clientpositive/union_remove_13.q.out @@ -69,8 +69,8 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8, Stage-9 , consists of Stage-3, Stage-2, Stage-4 + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1, Stage-9 , consists of Stage-3, Stage-2, Stage-4 Stage-3 Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 Stage-2 @@ -80,7 +80,7 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-10 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -240,8 +240,8 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), (inputtbl1)inputtbl1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: desc formatted outputTbl1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@outputtbl1 diff --git a/ql/src/test/results/clientpositive/union_remove_14.q.out b/ql/src/test/results/clientpositive/union_remove_14.q.out index 29d8ac1..ccecf36 100644 --- a/ql/src/test/results/clientpositive/union_remove_14.q.out +++ b/ql/src/test/results/clientpositive/union_remove_14.q.out @@ -71,8 +71,8 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8, Stage-9 , consists of Stage-3, Stage-2, Stage-4 + Stage-1 is a root stage + Stage-6 depends on stages: Stage-1, Stage-9 , consists of Stage-3, Stage-2, Stage-4 Stage-3 Stage-0 depends on stages: Stage-3, Stage-2, Stage-5 Stage-2 @@ -82,7 +82,7 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-10 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -219,7 +219,7 @@ FROM inputTbl1 a join inputTbl1 b on a.key=b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)b.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: desc formatted outputTbl1 PREHOOK: type: DESCTABLE diff --git a/ql/src/test/results/clientpositive/union_remove_15.q.out b/ql/src/test/results/clientpositive/union_remove_15.q.out index f37b098..4e60272 100644 --- a/ql/src/test/results/clientpositive/union_remove_15.q.out +++ b/ql/src/test/results/clientpositive/union_remove_15.q.out @@ -102,7 +102,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -156,7 +156,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/union_remove_16.q.out b/ql/src/test/results/clientpositive/union_remove_16.q.out index 3e84c7e..19417c2 100644 --- a/ql/src/test/results/clientpositive/union_remove_16.q.out +++ b/ql/src/test/results/clientpositive/union_remove_16.q.out @@ -105,7 +105,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -188,7 +188,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/union_remove_17.q.out b/ql/src/test/results/clientpositive/union_remove_17.q.out index 5b466c6..386b023 100644 --- a/ql/src/test/results/clientpositive/union_remove_17.q.out +++ b/ql/src/test/results/clientpositive/union_remove_17.q.out @@ -72,7 +72,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 2 (type: int), '2' (type: string) + expressions: key (type: string), 1 (type: int), '1' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union @@ -93,7 +93,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int), '1' (type: string) + expressions: key (type: string), 2 (type: int), '2' (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union diff --git a/ql/src/test/results/clientpositive/union_remove_19.q.out b/ql/src/test/results/clientpositive/union_remove_19.q.out index a030c24..1f9bb35 100644 --- a/ql/src/test/results/clientpositive/union_remove_19.q.out +++ b/ql/src/test/results/clientpositive/union_remove_19.q.out @@ -99,18 +99,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -151,18 +147,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 SELECT a.key, a.values @@ -466,18 +458,14 @@ STAGE PLANS: Filter Operator predicate: (_col0 >= 7.0) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -525,18 +513,14 @@ STAGE PLANS: Filter Operator predicate: (_col0 >= 7.0) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 select key, values from diff --git a/ql/src/test/results/clientpositive/union_remove_2.q.out b/ql/src/test/results/clientpositive/union_remove_2.q.out index e407139..899dadb 100644 --- a/ql/src/test/results/clientpositive/union_remove_2.q.out +++ b/ql/src/test/results/clientpositive/union_remove_2.q.out @@ -80,35 +80,6 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE @@ -131,20 +102,26 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -163,6 +140,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), UDFToLong(2) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/union_remove_23.q.out b/ql/src/test/results/clientpositive/union_remove_23.q.out index d4f1468..6f2cf20 100644 --- a/ql/src/test/results/clientpositive/union_remove_23.q.out +++ b/ql/src/test/results/clientpositive/union_remove_23.q.out @@ -66,68 +66,16 @@ FROM ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1, Stage-3 - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2, Stage-4 + Stage-4 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -138,7 +86,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -157,24 +105,20 @@ STAGE PLANS: 1 outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -191,18 +135,62 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 SELECT * @@ -226,8 +214,8 @@ FROM ( POSTHOOK: type: QUERY POSTHOOK: Input: default@inputtbl1 POSTHOOK: Output: default@outputtbl1 -POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)a.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)b.null, (inputtbl1)a.null, (inputtbl1)b.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)a.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)a.null, (inputtbl1)a.null, (inputtbl1)b.null, (inputtbl1)inputtbl1.null, ] PREHOOK: query: desc formatted outputTbl1 PREHOOK: type: DESCTABLE PREHOOK: Input: default@outputtbl1 diff --git a/ql/src/test/results/clientpositive/union_remove_24.q.out b/ql/src/test/results/clientpositive/union_remove_24.q.out index 5d1f4fa..c711ec8 100644 --- a/ql/src/test/results/clientpositive/union_remove_24.q.out +++ b/ql/src/test/results/clientpositive/union_remove_24.q.out @@ -94,7 +94,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) + expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator @@ -146,7 +146,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/union_remove_25.q.out b/ql/src/test/results/clientpositive/union_remove_25.q.out index 28940f7..6dc525b 100644 --- a/ql/src/test/results/clientpositive/union_remove_25.q.out +++ b/ql/src/test/results/clientpositive/union_remove_25.q.out @@ -111,18 +111,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -165,18 +161,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 partition(ds='2004') SELECT * diff --git a/ql/src/test/results/clientpositive/union_remove_3.q.out b/ql/src/test/results/clientpositive/union_remove_3.q.out index bda484b..e210461 100644 --- a/ql/src/test/results/clientpositive/union_remove_3.q.out +++ b/ql/src/test/results/clientpositive/union_remove_3.q.out @@ -78,7 +78,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 3 (type: int) + expressions: key (type: string), 1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union @@ -99,7 +99,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) + expressions: key (type: string), 2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union @@ -120,7 +120,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 2 (type: int) + expressions: key (type: string), 3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Union diff --git a/ql/src/test/results/clientpositive/union_remove_4.q.out b/ql/src/test/results/clientpositive/union_remove_4.q.out index f139595..bf35c18 100644 --- a/ql/src/test/results/clientpositive/union_remove_4.q.out +++ b/ql/src/test/results/clientpositive/union_remove_4.q.out @@ -100,18 +100,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-6 Conditional Operator @@ -191,18 +187,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 SELECT * diff --git a/ql/src/test/results/clientpositive/union_remove_5.q.out b/ql/src/test/results/clientpositive/union_remove_5.q.out index 6841f43..12e3907 100644 --- a/ql/src/test/results/clientpositive/union_remove_5.q.out +++ b/ql/src/test/results/clientpositive/union_remove_5.q.out @@ -87,17 +87,36 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-6 Conditional Operator @@ -155,40 +174,17 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), UDFToLong(1) (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Stage: Stage-8 Map Reduce @@ -197,7 +193,7 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) + expressions: key (type: string), UDFToLong(2) (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/union_remove_6.q.out b/ql/src/test/results/clientpositive/union_remove_6.q.out index 1b501af..ff23e2d 100644 --- a/ql/src/test/results/clientpositive/union_remove_6.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6.q.out @@ -99,16 +99,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -116,57 +112,41 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 TableScan Union Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl2 Stage: Stage-0 Move Operator @@ -217,16 +197,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: FROM ( SELECT key, count(1) as values from inputTbl1 group by key diff --git a/ql/src/test/results/clientpositive/union_remove_7.q.out b/ql/src/test/results/clientpositive/union_remove_7.q.out index 679b4d7..9d2caaa 100644 --- a/ql/src/test/results/clientpositive/union_remove_7.q.out +++ b/ql/src/test/results/clientpositive/union_remove_7.q.out @@ -99,18 +99,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-0 Move Operator @@ -151,18 +147,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 SELECT * diff --git a/ql/src/test/results/clientpositive/union_remove_8.q.out b/ql/src/test/results/clientpositive/union_remove_8.q.out index 529105b..ec9349a 100644 --- a/ql/src/test/results/clientpositive/union_remove_8.q.out +++ b/ql/src/test/results/clientpositive/union_remove_8.q.out @@ -84,35 +84,6 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), UDFToLong(2) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE @@ -135,20 +106,26 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 - Stage: Stage-3 + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -167,6 +144,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), UDFToLong(2) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM ( diff --git a/ql/src/test/results/clientpositive/union_remove_9.q.out b/ql/src/test/results/clientpositive/union_remove_9.q.out index 9b4168e..abdb778 100644 --- a/ql/src/test/results/clientpositive/union_remove_9.q.out +++ b/ql/src/test/results/clientpositive/union_remove_9.q.out @@ -90,44 +90,36 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string), 1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 - TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string), 2 (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: key Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Stage: Stage-6 Conditional Operator @@ -175,40 +167,44 @@ STAGE PLANS: alias: inputtbl1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), 1 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 + Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 PREHOOK: query: insert overwrite table outputTbl1 SELECT * FROM diff --git a/ql/src/test/results/clientpositive/union_top_level.q.out b/ql/src/test/results/clientpositive/union_top_level.q.out index 2be60f0..dd68775 100644 --- a/ql/src/test/results/clientpositive/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/union_top_level.q.out @@ -801,48 +801,36 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top TableScan Union Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top TableScan Union Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top Stage: Stage-8 Conditional Operator @@ -1074,48 +1062,36 @@ STAGE PLANS: TableScan Union Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top TableScan Union Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top TableScan Union Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_top + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_top Stage: Stage-8 Conditional Operator diff --git a/ql/src/test/results/clientpositive/union_view.q.out b/ql/src/test/results/clientpositive/union_view.q.out index ce987c3..5e69ec2 100644 --- a/ql/src/test/results/clientpositive/union_view.q.out +++ b/ql/src/test/results/clientpositive/union_view.q.out @@ -651,7 +651,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_2_src_union_2_key_idx__ + alias: default.default__src_union_1_src_union_1_key_idx__ filterExpr: (key = 86) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) @@ -675,16 +675,16 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src_union_2 + alias: src_union_1 filterExpr: ((key = 86) and ds is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 86) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -697,16 +697,16 @@ STAGE PLANS: Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan - alias: src_union_1 + alias: src_union_2 filterExpr: ((key = 86) and ds is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 86) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string), ds (type: string) outputColumnNames: _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -757,7 +757,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: default.default__src_union_1_src_union_1_key_idx__ + alias: default.default__src_union_2_src_union_2_key_idx__ filterExpr: (key = 86) (type: boolean) Filter Operator predicate: (key = 86) (type: boolean) diff --git a/ql/src/test/results/clientpositive/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/vector_cast_constant.q.out index 13959a0..8817dd7 100644 --- a/ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -146,17 +146,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -165,13 +161,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/vector_char_2.q.out b/ql/src/test/results/clientpositive/vector_char_2.q.out index 7d1512c..117967a 100644 --- a/ql/src/test/results/clientpositive/vector_char_2.q.out +++ b/ql/src/test/results/clientpositive/vector_char_2.q.out @@ -94,17 +94,13 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -113,13 +109,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE @@ -228,17 +224,13 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -247,13 +239,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index 787d5dd..cf975d1 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -46,22 +46,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count() keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -70,17 +70,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 280 Data size: 63906 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col9 > 1) (type: boolean) - Statistics: Num rows: 93 Data size: 21225 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 93 Data size: 21225 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 93 Data size: 21225 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,22 +144,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_vgby - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) outputColumnNames: cint, cdecimal1, cdecimal2 - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 561 Data size: 128041 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -168,17 +168,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 280 Data size: 63906 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col15 > 1) (type: boolean) - Statistics: Num rows: 93 Data size: 21225 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 93 Data size: 21225 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 93 Data size: 21225 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_elt.q.out b/ql/src/test/results/clientpositive/vector_elt.q.out index d3cc179..e84c0a7 100644 --- a/ql/src/test/results/clientpositive/vector_elt.q.out +++ b/ql/src/test/results/clientpositive/vector_elt.q.out @@ -91,37 +91,24 @@ SELECT elt(2, 'abc', 'defg'), FROM alltypesorc LIMIT 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 12288 Data size: 8687616 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 707 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 707 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: - ListSink + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 12288 Data size: 8687616 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 707 Basic stats: COMPLETE Column stats: COMPLETE + ListSink PREHOOK: query: SELECT elt(2, 'abc', 'defg'), elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'), diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 438d6b6..2ae10ef 100644 --- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -374,30 +374,26 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0) - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/vector_left_outer_join.q.out index d1c5855..e286d3f 100644 --- a/ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -17,12 +17,12 @@ left outer join alltypesorc hd ) t1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: t1:cd @@ -55,7 +55,7 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 ctinyint (type: tinyint) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -82,17 +82,15 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 ctinyint (type: tinyint) Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Execution mode: vectorized diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 03c8afa..4c05f6c 100644 --- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -23,13 +23,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-7 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -38,38 +38,30 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: li @@ -117,7 +109,7 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -131,7 +123,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 l_partkey (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -142,14 +134,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -202,13 +194,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-7 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-1 is a root stage + Stage-8 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -217,38 +209,30 @@ STAGE PLANS: Filter Operator predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int) - outputColumnNames: l_partkey + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: li @@ -296,7 +280,7 @@ STAGE PLANS: 0 _col1 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -310,7 +294,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 l_partkey (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -321,14 +305,14 @@ STAGE PLANS: 0 _col1 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 3627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/vector_orderby_5.q.out index aa43500..3e24a32 100644 --- a/ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ b/ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -142,17 +142,13 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: boolean), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -161,16 +157,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out b/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 527a117..084d3d5 100644 --- a/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ b/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -39,22 +39,18 @@ STAGE PLANS: Filter Operator predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - outputColumnNames: cint, cdouble, cdecimal1, cdecimal2 + Group By Operator + aggregations: min(cdecimal1) + keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(cdecimal1) - keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - sort order: ++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(20,10)) + value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -62,17 +58,13 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)), _col4 (type: decimal(20,10)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -81,13 +73,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) sort order: ++++ - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(20,10)) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(20,10)), KEY.reducesinkkey3 (type: decimal(23,14)), VALUE._col0 (type: decimal(20,10)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 763 Data size: 180068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1526 Data size: 360136 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 50 Statistics: Num rows: 50 Data size: 11800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/vectorization_0.q.out b/ql/src/test/results/clientpositive/vectorization_0.q.out index d50899e..6c04bfe 100644 --- a/ql/src/test/results/clientpositive/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/vectorization_0.q.out @@ -48,16 +48,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -148,16 +144,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -257,16 +249,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -373,16 +361,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -473,16 +457,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -582,16 +562,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -698,16 +674,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -798,16 +770,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -907,16 +875,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out index 5a31b9e..8c47ece 100644 --- a/ql/src/test/results/clientpositive/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -485,17 +485,13 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -504,13 +500,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: bigint), _col0 (type: double) sort order: ++ - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/vectorized_context.q.out b/ql/src/test/results/clientpositive/vectorized_context.q.out index fe24fd7..70f87f5 100644 --- a/ql/src/test/results/clientpositive/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/vectorized_context.q.out @@ -97,12 +97,12 @@ JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics. limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: household_demographics @@ -141,7 +141,7 @@ STAGE PLANS: 0 ss_store_sk (type: int) 1 s_store_sk (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index bf4230f..2d43528 100644 --- a/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1020,16 +1020,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index da1a838..d574b20 100644 --- a/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -54,19 +54,15 @@ STAGE PLANS: 1 cint (type: int) outputColumnNames: _col2, _col17 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Local Work: Map Reduce Local Work Execution mode: vectorized diff --git a/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out index ed7e595..97dc5b8 100644 --- a/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out @@ -3,12 +3,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-2 + Stage-8 is a root stage + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: t1:v1 @@ -47,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: smallint) 1 csmallint (type: smallint) - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -85,19 +85,15 @@ STAGE PLANS: 1 csmallint (type: smallint) outputColumnNames: _col1 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + value expressions: _col0 (type: double) Local Work: Map Reduce Local Work Execution mode: vectorized diff --git a/ql/src/test/results/clientpositive/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/vectorized_ptf.q.out index 8462c74..5acda35 100644 --- a/ql/src/test/results/clientpositive/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/vectorized_ptf.q.out @@ -647,27 +647,23 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int,string,string,string,string,int,string,double,string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int,string,string,string,string,int,string,double,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -2174,12 +2170,12 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -2275,10 +2271,20 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false + TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -2293,16 +2299,6 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4687,13 +4683,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -4789,10 +4785,20 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false + TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false @@ -4807,22 +4813,12 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -4915,7 +4911,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -4933,7 +4929,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10002 + base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -5494,28 +5490,24 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string,string,double - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string,string,double + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -5526,7 +5518,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col0 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) auto parallelism: false @@ -5558,9 +5550,9 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -5588,7 +5580,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double) auto parallelism: false @@ -5620,19 +5612,19 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), _wcol0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git a/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index f621927..ab27e0b 100644 --- a/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) @@ -27,7 +27,7 @@ STAGE PLANS: Map-reduce partition columns: cint (type: int) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE TableScan - alias: t1 + alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) @@ -47,21 +47,17 @@ STAGE PLANS: 1 {KEY.reducesinkkey0} outputColumnNames: _col2, _col17 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col17 (type: int) - outputColumnNames: _col2, _col17 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/serde/pom.xml b/serde/pom.xml index b22c751..0d3adc4 100644 --- a/serde/pom.xml +++ b/serde/pom.xml @@ -80,6 +80,11 @@ opencsv ${opencsv.version} + + com.twitter + parquet-hadoop-bundle + ${parquet.version} + diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 616c6db..41a14c2 100644 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -43,7 +43,8 @@ * The possible types for sargs. */ public static enum Type { - INTEGER, // all of the integer types + INTEGER, // all of the integer types except long + LONG, FLOAT, // float and double STRING, // string, char, varchar DATE, @@ -53,12 +54,20 @@ } /** + * file format which supports search arguments + */ + public static enum FileFormat { + ORC, + PARQUET + } + + /** * Get the operator for the leaf. */ public Operator getOperator(); /** - * Get the type of the column and literal. + * Get the type of the column and literal by the file format. */ public Type getType(); @@ -69,14 +78,17 @@ public String getColumnName(); /** - * Get the literal half of the predicate leaf. - * @return a Long, Double, or String + * Get the literal half of the predicate leaf. Adapt the original type for what orc needs + * @return a Long, Double, or String for Orc and a Int, Long, Double, or String for parquet */ - public Object getLiteral(); + public Object getLiteral(FileFormat format); /** * For operators with multiple literals (IN and BETWEEN), get the literals. - * @return the list of literals (Longs, Doubles, or Strings) + * + * @return the list of literals (Longs, Doubles, or Strings) for orc or the list of literals + * (Integer, Longs, Doubles, or String) for parquet */ - public List getLiteralList(); + public List getLiteralList(FileFormat format); + } diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java index db0f014..9be54da 100644 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java +++ b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.io.sarg; +import parquet.filter2.predicate.FilterPredicate; + import java.util.List; /** @@ -175,6 +177,12 @@ public boolean isNeeded() { public String toKryo(); /** + * Translate the search argument to the filter predicate parquet used + * @return + */ + public FilterPredicate toFilterPredicate(); + + /** * A builder object for contexts outside of Hive where it isn't easy to * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot * before adding any leaves. diff --git a/service/src/java/org/apache/hive/service/ServiceUtils.java b/service/src/java/org/apache/hive/service/ServiceUtils.java new file mode 100644 index 0000000..e712aaf --- /dev/null +++ b/service/src/java/org/apache/hive/service/ServiceUtils.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.service; + +public class ServiceUtils { + + /* + * Get the index separating the user name from domain name (the user's name up + * to the first '/' or '@'). + * + * @param userName full user name. + * @return index of domain match or -1 if not found + */ + public static int indexOfDomainMatch(String userName) { + if (userName == null) { + return -1; + } + + int idx = userName.indexOf('/'); + int idx2 = userName.indexOf('@'); + int endIdx = Math.min(idx, idx2); // Use the earlier match. + // Unless at least one of '/' or '@' was not found, in + // which case, user the latter match. + if (endIdx == -1) { + endIdx = Math.max(idx, idx2); + } + return endIdx; + } +} \ No newline at end of file diff --git a/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java b/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java index 23ba79c..8352951 100644 --- a/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java +++ b/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java @@ -37,7 +37,9 @@ import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.thrift.ThriftCLIService; import org.apache.thrift.TProcessorFactory; @@ -100,8 +102,7 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException { if (authTypeStr == null) { authTypeStr = AuthTypes.NONE.getAuthName(); } - if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName()) - && ShimLoader.getHadoopShims().isSecureShimImpl()) { + if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) { saslServer = ShimLoader.getHadoopThriftAuthBridge() .createServer(conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB), conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)); @@ -180,7 +181,7 @@ public static void loginFromKeytab(HiveConf hiveConf) throws IOException { if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured"); } else { - ShimLoader.getHadoopShims().loginUserFromKeytab(principal, keyTabFile); + UserGroupInformation.loginUserFromKeytab(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); } } @@ -192,7 +193,7 @@ public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hi if (principal.isEmpty() || keyTabFile.isEmpty()) { throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured"); } else { - return ShimLoader.getHadoopShims().loginUserFromKeytabAndReturnUGI(principal, keyTabFile); + return UserGroupInformation.loginUserFromKeytabAndReturnUGI(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile); } } @@ -328,16 +329,17 @@ public static void verifyProxyAccess(String realUser, String proxyUser, String i HiveConf hiveConf) throws HiveSQLException { try { UserGroupInformation sessionUgi; - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser); - String shortPrincipalName = kerbName.getServiceName(); - sessionUgi = ShimLoader.getHadoopShims().createProxyUser(shortPrincipalName); + sessionUgi = UserGroupInformation.createProxyUser( + kerbName.getServiceName(), UserGroupInformation.getLoginUser()); } else { - sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(realUser, null); + sessionUgi = UserGroupInformation.createRemoteUser(realUser); } if (!proxyUser.equalsIgnoreCase(realUser)) { - ShimLoader.getHadoopShims(). - authorizeProxyAccess(proxyUser, sessionUgi, ipAddress, hiveConf); + ProxyUsers.refreshSuperUserGroupsConfiguration(hiveConf); + ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, sessionUgi), + ipAddress, hiveConf); } } catch (IOException e) { throw new HiveSQLException( diff --git a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java index d075761..7292cd9 100644 --- a/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java +++ b/service/src/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java @@ -24,6 +24,7 @@ import javax.security.sasl.AuthenticationException; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hive.service.ServiceUtils; public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvider { @@ -45,10 +46,11 @@ public void Authenticate(String user, String password) throws AuthenticationExce env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory"); env.put(Context.PROVIDER_URL, ldapURL); - // If the domain is supplied, then append it. LDAP providers like Active Directory - // use a fully qualified user name like foo@bar.com. - if (ldapDomain != null) { - user = user + "@" + ldapDomain; + // If the domain is available in the config, then append it unless domain is + // already part of the username. LDAP providers like Active Directory use a + // fully qualified user name like foo@bar.com. + if (!hasDomain(user) && ldapDomain != null) { + user = user + "@" + ldapDomain; } // setup the security principal @@ -71,4 +73,7 @@ public void Authenticate(String user, String password) throws AuthenticationExce } } + private boolean hasDomain(String userName) { + return (ServiceUtils.indexOfDomainMatch(userName) > 0); + } } diff --git a/service/src/java/org/apache/hive/service/cli/CLIService.java b/service/src/java/org/apache/hive/service/cli/CLIService.java index f5751f1..3f8aa0b 100644 --- a/service/src/java/org/apache/hive/service/cli/CLIService.java +++ b/service/src/java/org/apache/hive/service/cli/CLIService.java @@ -34,9 +34,11 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.CompositeService; import org.apache.hive.service.ServiceException; @@ -46,6 +48,9 @@ import org.apache.hive.service.cli.thrift.TProtocolVersion; import org.apache.hive.service.server.HiveServer2; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; + /** * CLIService. * @@ -79,10 +84,10 @@ public synchronized void init(HiveConf hiveConf) { sessionManager = new SessionManager(hiveServer2); addService(sessionManager); // If the hadoop cluster is secure, do a kerberos login for the service from the keytab - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { try { HiveAuthFactory.loginFromKeytab(hiveConf); - this.serviceUGI = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + this.serviceUGI = Utils.getUGIForConf(hiveConf); } catch (IOException e) { throw new ServiceException("Unable to login to kerberos with given principal/keytab", e); } catch (LoginException e) { @@ -104,9 +109,16 @@ public synchronized void init(HiveConf hiveConf) { } } } + setupBlockedUdfs(); super.init(hiveConf); } + private void setupBlockedUdfs() { + FunctionRegistry.setupPermissionsForBuiltinUDFs( + hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST), + hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST)); + } + public UserGroupInformation getServiceUGI() { return this.serviceUGI; } diff --git a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java index 8cabf7e..c9485b6 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.FetchOrientation; @@ -205,7 +206,7 @@ public Object run() throws HiveSQLException { }; try { - ShimLoader.getHadoopShims().doAs(currentUGI, doAsAction); + currentUGI.doAs(doAsAction); } catch (Exception e) { setOperationException(new HiveSQLException(e)); LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e); @@ -245,7 +246,7 @@ public Object run() throws HiveSQLException { */ private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException { try { - return ShimLoader.getHadoopShims().getUGIForConf(opConfig); + return Utils.getUGIForConf(opConfig); } catch (Exception e) { throw new HiveSQLException("Unable to get current user", e); } diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java index bad533b..7acd113 100644 --- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java +++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java @@ -20,10 +20,14 @@ import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.cli.HiveSQLException; @@ -41,6 +45,7 @@ private String delegationTokenStr = null; private Hive sessionHive = null; private HiveSession proxySession = null; + static final Log LOG = LogFactory.getLog(HiveSessionImplwithUGI.class); public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password, HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException { @@ -62,14 +67,15 @@ public void setSessionUGI(String owner) throws HiveSQLException { if (owner == null) { throw new HiveSQLException("No username provided for impersonation"); } - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { try { - sessionUgi = ShimLoader.getHadoopShims().createProxyUser(owner); + sessionUgi = UserGroupInformation.createProxyUser( + owner, UserGroupInformation.getLoginUser()); } catch (IOException e) { throw new HiveSQLException("Couldn't setup proxy user", e); } } else { - sessionUgi = ShimLoader.getHadoopShims().createRemoteUser(owner, null); + sessionUgi = UserGroupInformation.createRemoteUser(owner); } } @@ -98,8 +104,10 @@ protected synchronized void acquire(boolean userAccess) { public void close() throws HiveSQLException { try { acquire(true); - ShimLoader.getHadoopShims().closeAllForUGI(sessionUgi); + FileSystem.closeAllForUGI(sessionUgi); cancelDelegationToken(); + } catch (IOException ioe) { + LOG.error("Could not clean up file-system handles for UGI: " + sessionUgi, ioe); } finally { release(true); super.close(); @@ -118,7 +126,7 @@ private void setDelegationToken(String delegationTokenStr) throws HiveSQLExcepti if (delegationTokenStr != null) { getHiveConf().set("hive.metastore.token.signature", HS2TOKEN); try { - ShimLoader.getHadoopShims().setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN); + Utils.setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN); } catch (IOException e) { throw new HiveSQLException("Couldn't setup delegation token in the ugi", e); } diff --git a/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java b/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java index 0e6ea63..5b10521 100644 --- a/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java +++ b/service/src/java/org/apache/hive/service/cli/session/HiveSessionProxy.java @@ -30,7 +30,6 @@ import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.service.cli.HiveSQLException; @@ -57,7 +56,7 @@ public Object invoke(Object arg0, final Method method, final Object[] args) if (method.getDeclaringClass() == HiveSessionBase.class) { return invoke(method, args); } - return ShimLoader.getHadoopShims().doAs(ugi, + return ugi.doAs( new PrivilegedExceptionAction () { @Override public Object run() throws HiveSQLException { diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java index dc68efb..b6e851a 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java @@ -78,10 +78,16 @@ public void run() { // Server args int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE); + int requestTimeout = (int) hiveConf.getTimeVar( + HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS); + int beBackoffSlotLength = (int) hiveConf.getTimeVar( + HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS); TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket) .processorFactory(processorFactory).transportFactory(transportFactory) .protocolFactory(new TBinaryProtocol.Factory()) - .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize)) + .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize)) + .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS) + .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS) .executorService(executorService); // TCP Server diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index 3a8ae70..3345b5f 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hive.service.AbstractService; import org.apache.hive.service.ServiceException; +import org.apache.hive.service.ServiceUtils; import org.apache.hive.service.auth.HiveAuthFactory; import org.apache.hive.service.auth.TSetIpAddressProcessor; import org.apache.hive.service.cli.*; @@ -295,11 +296,24 @@ private String getUserName(TOpenSessionReq req) throws HiveSQLException { if (userName == null) { userName = req.getUsername(); } + + userName = getShortName(userName); String effectiveClientUser = getProxyUser(userName, req.getConfiguration(), getIpAddress()); LOG.debug("Client's username: " + effectiveClientUser); return effectiveClientUser; } + private String getShortName(String userName) { + String ret = null; + if (userName != null) { + int indexOfDomainMatch = ServiceUtils.indexOfDomainMatch(userName); + ret = (indexOfDomainMatch <= 0) ? userName : + userName.substring(0, indexOfDomainMatch); + } + + return ret; + } + /** * Create a session handle * @param req diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 4503471..21025a2 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -44,6 +44,8 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.hive.service.CompositeService; @@ -118,7 +120,7 @@ public static boolean isHTTPTransportMode(HiveConf hiveConf) { @Override public List getDefaultAcl() { - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { // Read all to the world nodeAcls.addAll(Ids.READ_ACL_UNSAFE); // Create/Delete/Write/Admin to the authenticated user @@ -198,7 +200,7 @@ private void addServerInstanceToZooKeeper(HiveConf hiveConf) throws Exception { * @throws Exception */ private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { - if (ShimLoader.getHadoopShims().isSecurityEnabled()) { + if (UserGroupInformation.isSecurityEnabled()) { String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL); if (principal.isEmpty()) { throw new IOException("HiveServer2 Kerberos principal is empty"); @@ -208,7 +210,7 @@ private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { throw new IOException("HiveServer2 Kerberos keytab is empty"); } // Install the JAAS Configuration for the runtime - ShimLoader.getHadoopShims().setZookeeperClientKerberosJaasConfig(principal, keyTabFile); + Utils.setZookeeperClientKerberosJaasConfig(principal, keyTabFile); } } diff --git a/shims/0.20/pom.xml b/shims/0.20/pom.xml deleted file mode 100644 index 0f6eb17..0000000 --- a/shims/0.20/pom.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - 4.0.0 - - org.apache.hive - hive - 0.15.0-SNAPSHOT - ../../pom.xml - - - org.apache.hive.shims - hive-shims-0.20 - jar - Hive Shims 0.20 - - - ../.. - - - - - - - org.apache.hive.shims - hive-shims-common - ${project.version} - - - - org.apache.hadoop - hadoop-core - ${hadoop-20.version} - true - - - org.apache.hadoop - hadoop-test - ${hadoop-20.version} - true - - - org.apache.hadoop - hadoop-tools - ${hadoop-20.version} - true - - - diff --git a/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java b/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java deleted file mode 100644 index 0700099..0000000 --- a/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ /dev/null @@ -1,964 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.security.AccessControlException; -import java.security.PrivilegedActionException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -import javax.security.auth.Subject; -import javax.security.auth.login.LoginException; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.DefaultFileAccess; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.ProxyFileSystem; -import org.apache.hadoop.fs.Trash; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapred.ClusterStatus; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.mapred.TaskLogServlet; -import org.apache.hadoop.mapred.lib.CombineFileInputFormat; -import org.apache.hadoop.mapred.lib.CombineFileSplit; -import org.apache.hadoop.mapred.lib.TotalOrderPartitioner; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UnixUserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.tools.HadoopArchives; -import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.util.VersionInfo; - -public class Hadoop20Shims implements HadoopShims { - - /** - * Returns a shim to wrap MiniMrCluster - */ - @Override - public MiniMrShim getMiniMrCluster(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - return new MiniMrShim(conf, numberOfTaskTrackers, nameNode, numDir); - } - - @Override - public MiniMrShim getMiniTezCluster(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - throw new IOException("Cannot run tez on current hadoop, Version: " + VersionInfo.getVersion()); - } - - /** - * Shim for MiniMrCluster - */ - public class MiniMrShim implements HadoopShims.MiniMrShim { - - private final MiniMRCluster mr; - - public MiniMrShim(Configuration conf, int numberOfTaskTrackers, - String nameNode, int numDir) throws IOException { - this.mr = new MiniMRCluster(numberOfTaskTrackers, nameNode, numDir); - } - - @Override - public int getJobTrackerPort() throws UnsupportedOperationException { - return mr.getJobTrackerPort(); - } - - @Override - public void shutdown() throws IOException { - mr.shutdown(); - } - - @Override - public void setupConfiguration(Configuration conf) { - setJobLauncherRpcAddress(conf, "localhost:" + mr.getJobTrackerPort()); - } - } - - @Override - public HadoopShims.MiniDFSShim getMiniDfs(Configuration conf, - int numDataNodes, - boolean format, - String[] racks) throws IOException { - return new MiniDFSShim(new MiniDFSCluster(conf, numDataNodes, format, racks)); - } - - /** - * MiniDFSShim. - * - */ - public class MiniDFSShim implements HadoopShims.MiniDFSShim { - private final MiniDFSCluster cluster; - - public MiniDFSShim(MiniDFSCluster cluster) { - this.cluster = cluster; - } - - @Override - public FileSystem getFileSystem() throws IOException { - return cluster.getFileSystem(); - } - - @Override - public void shutdown() { - cluster.shutdown(); - } - } - - @Override - public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { - return new CombineFileInputFormatShim() { - @Override - public RecordReader getRecordReader(InputSplit split, - JobConf job, Reporter reporter) throws IOException { - throw new IOException("CombineFileInputFormat.getRecordReader not needed."); - } - }; - } - - @Override - public void setTotalOrderPartitionFile(JobConf jobConf, Path partitionFile){ - TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); - } - - @Override - public Comparator getLongComparator() { - return new Comparator() { - @Override - public int compare(LongWritable o1, LongWritable o2) { - return o1.compareTo(o2); - } - }; - } - - public static class InputSplitShim extends CombineFileSplit implements HadoopShims.InputSplitShim { - long shrinkedLength; - boolean _isShrinked; - public InputSplitShim() { - super(); - _isShrinked = false; - } - - public InputSplitShim(CombineFileSplit old) throws IOException { - super(old.getJob(), old.getPaths(), old.getStartOffsets(), - old.getLengths(), dedup(old.getLocations())); - _isShrinked = false; - } - - private static String[] dedup(String[] locations) { - Set dedup = new HashSet(); - Collections.addAll(dedup, locations); - return dedup.toArray(new String[dedup.size()]); - } - - @Override - public void shrinkSplit(long length) { - _isShrinked = true; - shrinkedLength = length; - } - - public boolean isShrinked() { - return _isShrinked; - } - - public long getShrinkedLength() { - return shrinkedLength; - } - - @Override - public void readFields(DataInput in) throws IOException { - super.readFields(in); - _isShrinked = in.readBoolean(); - if (_isShrinked) { - shrinkedLength = in.readLong(); - } - } - - @Override - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeBoolean(_isShrinked); - if (_isShrinked) { - out.writeLong(shrinkedLength); - } - } - } - - /* This class should be replaced with org.apache.hadoop.mapred.lib.CombineFileRecordReader class, once - * https://issues.apache.org/jira/browse/MAPREDUCE-955 is fixed. This code should be removed - it is a copy - * of org.apache.hadoop.mapred.lib.CombineFileRecordReader - */ - public static class CombineFileRecordReader implements RecordReader { - - static final Class[] constructorSignature = new Class[] { - InputSplit.class, - Configuration.class, - Reporter.class, - Integer.class - }; - - protected CombineFileSplit split; - protected JobConf jc; - protected Reporter reporter; - protected Class> rrClass; - protected Constructor> rrConstructor; - protected FileSystem fs; - - protected int idx; - protected long progress; - protected RecordReader curReader; - protected boolean isShrinked; - protected long shrinkedLength; - - @Override - public boolean next(K key, V value) throws IOException { - - while ((curReader == null) - || !doNextWithExceptionHandler((K) ((CombineHiveKey) key).getKey(), - value)) { - if (!initNextRecordReader(key)) { - return false; - } - } - return true; - } - - @Override - public K createKey() { - K newKey = curReader.createKey(); - return (K)(new CombineHiveKey(newKey)); - } - - @Override - public V createValue() { - return curReader.createValue(); - } - - /** - * Return the amount of data processed. - */ - @Override - public long getPos() throws IOException { - return progress; - } - - @Override - public void close() throws IOException { - if (curReader != null) { - curReader.close(); - curReader = null; - } - } - - /** - * Return progress based on the amount of data processed so far. - */ - @Override - public float getProgress() throws IOException { - long subprogress = 0; // bytes processed in current split - if (null != curReader) { - // idx is always one past the current subsplit's true index. - subprogress = (long)(curReader.getProgress() * split.getLength(idx - 1)); - } - return Math.min(1.0f, (progress + subprogress) / (float) (split.getLength())); - } - - /** - * A generic RecordReader that can hand out different recordReaders - * for each chunk in the CombineFileSplit. - */ - public CombineFileRecordReader(JobConf job, CombineFileSplit split, - Reporter reporter, - Class> rrClass) - throws IOException { - this.split = split; - this.jc = job; - this.rrClass = rrClass; - this.reporter = reporter; - this.idx = 0; - this.curReader = null; - this.progress = 0; - - isShrinked = false; - - assert (split instanceof InputSplitShim); - if (((InputSplitShim) split).isShrinked()) { - isShrinked = true; - shrinkedLength = ((InputSplitShim) split).getShrinkedLength(); - } - - try { - rrConstructor = rrClass.getDeclaredConstructor(constructorSignature); - rrConstructor.setAccessible(true); - } catch (Exception e) { - throw new RuntimeException(rrClass.getName() + - " does not have valid constructor", e); - } - initNextRecordReader(null); - } - - /** - * do next and handle exception inside it. - * @param key - * @param value - * @return - * @throws IOException - */ - private boolean doNextWithExceptionHandler(K key, V value) throws IOException { - try { - return curReader.next(key, value); - } catch (Exception e) { - return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jc); - } - } - - /** - * Get the record reader for the next chunk in this CombineFileSplit. - */ - protected boolean initNextRecordReader(K key) throws IOException { - - if (curReader != null) { - curReader.close(); - curReader = null; - if (idx > 0) { - progress += split.getLength(idx - 1); // done processing so far - } - } - - // if all chunks have been processed or reached the length, nothing more to do. - if (idx == split.getNumPaths() || (isShrinked && progress > shrinkedLength)) { - return false; - } - - // get a record reader for the idx-th chunk - try { - curReader = rrConstructor.newInstance(new Object[] - {split, jc, reporter, Integer.valueOf(idx)}); - - // change the key if need be - if (key != null) { - K newKey = curReader.createKey(); - ((CombineHiveKey)key).setKey(newKey); - } - - // setup some helper config variables. - jc.set("map.input.file", split.getPath(idx).toString()); - jc.setLong("map.input.start", split.getOffset(idx)); - jc.setLong("map.input.length", split.getLength(idx)); - } catch (Exception e) { - curReader=HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(e, jc); - } - idx++; - return true; - } - } - - public abstract static class CombineFileInputFormatShim extends - CombineFileInputFormat - implements HadoopShims.CombineFileInputFormatShim { - - @Override - public Path[] getInputPathsShim(JobConf conf) { - try { - return FileInputFormat.getInputPaths(conf); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public void createPool(JobConf conf, PathFilter... filters) { - super.createPool(conf, filters); - } - - @Override - public InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException { - long minSize = job.getLong("mapred.min.split.size", 0); - - // For backward compatibility, let the above parameter be used - if (job.getLong("mapred.min.split.size.per.node", 0) == 0) { - super.setMinSplitSizeNode(minSize); - } - - if (job.getLong("mapred.min.split.size.per.rack", 0) == 0) { - super.setMinSplitSizeRack(minSize); - } - - if (job.getLong("mapred.max.split.size", 0) == 0) { - super.setMaxSplitSize(minSize); - } - - CombineFileSplit[] splits = (CombineFileSplit[]) super.getSplits(job, numSplits); - - InputSplitShim[] isplits = new InputSplitShim[splits.length]; - for (int pos = 0; pos < splits.length; pos++) { - isplits[pos] = new InputSplitShim(splits[pos]); - } - - return isplits; - } - - @Override - public InputSplitShim getInputSplitShim() throws IOException { - return new InputSplitShim(); - } - - @Override - public RecordReader getRecordReader(JobConf job, HadoopShims.InputSplitShim split, - Reporter reporter, - Class> rrClass) - throws IOException { - CombineFileSplit cfSplit = (CombineFileSplit) split; - return new CombineFileRecordReader(job, cfSplit, reporter, rrClass); - } - - } - - @Override - public String getInputFormatClassName() { - return "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"; - } - - String[] ret = new String[2]; - - @Override - public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, - String archiveName) throws Exception { - - HadoopArchives har = new HadoopArchives(conf); - List args = new ArrayList(); - - args.add("-archiveName"); - args.add(archiveName); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - - return ToolRunner.run(har, args.toArray(new String[0])); - } - - /* - *(non-Javadoc) - * @see org.apache.hadoop.hive.shims.HadoopShims#getHarUri(java.net.URI, java.net.URI, java.net.URI) - * This particular instance is for Hadoop 20 which creates an archive - * with the entire directory path from which one created the archive as - * compared against the one used by Hadoop 1.0 (within HadoopShimsSecure) - * where a relative path is stored within the archive. - */ - @Override - public URI getHarUri (URI original, URI base, URI originalBase) - throws URISyntaxException { - URI relative = null; - - String dirInArchive = original.getPath(); - if (dirInArchive.length() > 1 && dirInArchive.charAt(0) == '/') { - dirInArchive = dirInArchive.substring(1); - } - - relative = new URI(null, null, dirInArchive, null); - - return base.resolve(relative); - } - - public static class NullOutputCommitter extends OutputCommitter { - @Override - public void setupJob(JobContext jobContext) { } - @Override - public void cleanupJob(JobContext jobContext) { } - - @Override - public void setupTask(TaskAttemptContext taskContext) { } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } - @Override - public void commitTask(TaskAttemptContext taskContext) { } - @Override - public void abortTask(TaskAttemptContext taskContext) { } - } - - @Override - public void prepareJobOutput(JobConf conf) { - conf.setOutputCommitter(Hadoop20Shims.NullOutputCommitter.class); - - // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); - - // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean("mapreduce.job.committer.task.cleanup.needed", false); - } - - @Override - public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException { - UserGroupInformation ugi = - UnixUserGroupInformation.readFromConf(conf, UnixUserGroupInformation.UGI_PROPERTY_NAME); - if(ugi == null) { - ugi = UserGroupInformation.login(conf); - } - return ugi; - } - - @Override - public boolean isSecureShimImpl() { - return false; - } - - @Override - public String getShortUserName(UserGroupInformation ugi) { - return ugi.getUserName(); - } - - @Override - public String getTokenStrForm(String tokenSignature) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) - throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public String addServiceToToken(String tokenStr, String tokenService) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - - @Override - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws - IOException, InterruptedException { - try { - return Subject.doAs(SecurityUtil.getSubject(ugi),pvea); - } catch (PrivilegedActionException e) { - throw new IOException(e); - } - } - - @Override - public Path createDelegationTokenFile(Configuration conf) throws IOException { - throw new UnsupportedOperationException("Tokens are not supported in current hadoop version"); - } - - @Override - public UserGroupInformation createRemoteUser(String userName, List groupNames) { - if (groupNames.isEmpty()) { - groupNames = new ArrayList(); - groupNames.add(userName); - } - return new UnixUserGroupInformation(userName, groupNames.toArray(new String[0])); - } - - @Override - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException { - throwKerberosUnsupportedError(); - } - - @Override - public UserGroupInformation loginUserFromKeytabAndReturnUGI( - String principal, String keytabFile) throws IOException { - throwKerberosUnsupportedError(); - return null; - } - - @Override - public String getResolvedPrincipal(String principal) throws IOException { - // Not supported - return null; - } - - @Override - public void reLoginUserFromKeytab() throws IOException{ - throwKerberosUnsupportedError(); - } - - @Override - public boolean isLoginKeytabBased() throws IOException { - return false; - } - - private void throwKerberosUnsupportedError() throws UnsupportedOperationException{ - throw new UnsupportedOperationException("Kerberos login is not supported" + - " in this hadoop version (" + VersionInfo.getVersion() + ")"); - } - - @Override - public UserGroupInformation createProxyUser(String userName) throws IOException { - return createRemoteUser(userName, null); - } - - @Override - public List listLocatedStatus(final FileSystem fs, - final Path path, - final PathFilter filter - ) throws IOException { - return Arrays.asList(fs.listStatus(path, filter)); - } - - @Override - public BlockLocation[] getLocations(FileSystem fs, - FileStatus status) throws IOException { - return fs.getFileBlockLocations(status, 0, status.getLen()); - } - - @Override - public TreeMap getLocationsWithOffset(FileSystem fs, - FileStatus status) throws IOException { - TreeMap offsetBlockMap = new TreeMap(); - BlockLocation[] locations = getLocations(fs, status); - for (BlockLocation location : locations) { - offsetBlockMap.put(location.getOffset(), location); - } - return offsetBlockMap; - } - - @Override - public void hflush(FSDataOutputStream stream) throws IOException { - stream.sync(); - } - - @Override - public HdfsFileStatus getFullFileStatus(Configuration conf, FileSystem fs, Path file) - throws IOException { - return new Hadoop20FileStatus(fs.getFileStatus(file)); - } - - @Override - public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, - FileSystem fs, Path target) throws IOException { - String group = sourceStatus.getFileStatus().getGroup(); - String permission = Integer.toString(sourceStatus.getFileStatus().getPermission().toShort(), 8); - //use FsShell to change group and permissions recursively - try { - FsShell fshell = new FsShell(); - fshell.setConf(conf); - run(fshell, new String[]{"-chgrp", "-R", group, target.toString()}); - run(fshell, new String[]{"-chmod", "-R", permission, target.toString()}); - } catch (Exception e) { - throw new IOException("Unable to set permissions of " + target, e); - } - try { - if (LOG.isDebugEnabled()) { //some trace logging - getFullFileStatus(conf, fs, target).debugLog(); - } - } catch (Exception e) { - //ignore. - } - } - - public class Hadoop20FileStatus implements HdfsFileStatus { - private final FileStatus fileStatus; - public Hadoop20FileStatus(FileStatus fileStatus) { - this.fileStatus = fileStatus; - } - @Override - public FileStatus getFileStatus() { - return fileStatus; - } - @Override - public void debugLog() { - if (fileStatus != null) { - LOG.debug(fileStatus.toString()); - } - } - } - - @Override - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException { - // This hadoop version doesn't have proxy verification - } - - @Override - public boolean isSecurityEnabled() { - return false; - } - - @Override - public String getTaskAttemptLogUrl(JobConf conf, - String taskTrackerHttpAddress, String taskAttemptId) - throws MalformedURLException { - URL taskTrackerHttpURL = new URL(taskTrackerHttpAddress); - return TaskLogServlet.getTaskLogUrl( - taskTrackerHttpURL.getHost(), - Integer.toString(taskTrackerHttpURL.getPort()), - taskAttemptId); - } - - @Override - public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { - switch (clusterStatus.getJobTrackerState()) { - case INITIALIZING: - return JobTrackerState.INITIALIZING; - case RUNNING: - return JobTrackerState.RUNNING; - default: - String errorMsg = "Unrecognized JobTracker state: " + clusterStatus.getJobTrackerState(); - throw new Exception(errorMsg); - } - } - - @Override - public String unquoteHtmlChars(String item) { - return item; - } - - - @Override - public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext(Configuration conf, final Progressable progressable) { - return new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, new TaskAttemptID()) { - @Override - public void progress() { - progressable.progress(); - } - }; - } - - @Override - public TaskAttemptID newTaskAttemptID(JobID jobId, boolean isMap, int taskId, int id) { - return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), isMap, taskId, id); - } - - @Override - public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job) { - return new org.apache.hadoop.mapreduce.JobContext(job.getConfiguration(), job.getJobID()); - } - - @Override - public void closeAllForUGI(UserGroupInformation ugi) { - // No such functionality in ancient hadoop - return; - } - - @Override - public boolean isLocalMode(Configuration conf) { - return "local".equals(getJobLauncherRpcAddress(conf)); - } - - @Override - public String getJobLauncherRpcAddress(Configuration conf) { - return conf.get("mapred.job.tracker"); - } - - @Override - public void setJobLauncherRpcAddress(Configuration conf, String val) { - conf.set("mapred.job.tracker", val); - } - - @Override - public String getJobLauncherHttpAddress(Configuration conf) { - return conf.get("mapred.job.tracker.http.address"); - } - - @Override - public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException { - // older versions of Hadoop don't have a Trash constructor based on the - // Path or FileSystem. So need to achieve this by creating a dummy conf. - // this needs to be filtered out based on version - - Configuration dupConf = new Configuration(conf); - FileSystem.setDefaultUri(dupConf, fs.getUri()); - Trash trash = new Trash(dupConf); - return trash.moveToTrash(path); - } - - @Override - public long getDefaultBlockSize(FileSystem fs, Path path) { - return fs.getDefaultBlockSize(); - } - - @Override - public short getDefaultReplication(FileSystem fs, Path path) { - return fs.getDefaultReplication(); - } - - @Override - public void refreshDefaultQueue(Configuration conf, String userName) { - // MR1 does not expose API required to set MR queue mapping for user - } - - @Override - public String getTokenFileLocEnvName() { - throw new UnsupportedOperationException( - "Kerberos not supported in current hadoop version"); - } - @Override - public HCatHadoopShims getHCatShim() { - throw new UnsupportedOperationException("HCatalog does not support Hadoop 0.20.x"); - } - @Override - public WebHCatJTShim getWebHCatShim(Configuration conf, UserGroupInformation ugi) throws IOException { - throw new UnsupportedOperationException("WebHCat does not support Hadoop 0.20.x"); - } - @Override - public FileSystem createProxyFileSystem(FileSystem fs, URI uri) { - return new ProxyFileSystem(fs, uri); - } - @Override - public Map getHadoopConfNames() { - Map ret = new HashMap(); - ret.put("HADOOPFS", "fs.default.name"); - ret.put("HADOOPMAPFILENAME", "map.input.file"); - ret.put("HADOOPMAPREDINPUTDIR", "mapred.input.dir"); - ret.put("HADOOPMAPREDINPUTDIRRECURSIVE", "mapred.input.dir.recursive"); - ret.put("MAPREDMAXSPLITSIZE", "mapred.max.split.size"); - ret.put("MAPREDMINSPLITSIZE", "mapred.min.split.size"); - ret.put("MAPREDMINSPLITSIZEPERRACK", "mapred.min.split.size.per.rack"); - ret.put("MAPREDMINSPLITSIZEPERNODE", "mapred.min.split.size.per.node"); - ret.put("HADOOPNUMREDUCERS", "mapred.reduce.tasks"); - ret.put("HADOOPJOBNAME", "mapred.job.name"); - ret.put("HADOOPSPECULATIVEEXECREDUCERS", "mapred.reduce.tasks.speculative.execution"); - ret.put("MAPREDSETUPCLEANUPNEEDED", "mapred.committer.job.setup.cleanup.needed"); - ret.put("MAPREDTASKCLEANUPNEEDED", "mapreduce.job.committer.task.cleanup.needed"); - return ret; - } - - @Override - public ZeroCopyReaderShim getZeroCopyReader(FSDataInputStream in, ByteBufferPoolShim pool) throws IOException { - /* not supported */ - return null; - } - - @Override - public DirectDecompressorShim getDirectDecompressor(DirectCompressionType codec) { - /* not supported */ - return null; - } - - @Override - public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext context) { - return context.getConfiguration(); - } - - @Override - public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException { - boolean origDisableHDFSCache = - conf.getBoolean("fs." + uri.getScheme() + ".impl.disable.cache", false); - // hadoop-20 compatible flag. - conf.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true); - FileSystem fs = FileSystem.get(uri, conf); - conf.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", origDisableHDFSCache); - return fs; - } - - @Override - public void getMergedCredentials(JobConf jobConf) throws IOException { - throw new IOException("Merging of credentials not supported in this version of hadoop"); - } - - @Override - public void mergeCredentials(JobConf dest, JobConf src) throws IOException { - throw new IOException("Merging of credentials not supported in this version of hadoop"); - } - - @Override - public String getCounterGroupName(String group, String defaultValue) { - return defaultValue; - } - - protected void run(FsShell shell, String[] command) throws Exception { - LOG.debug(ArrayUtils.toString(command)); - shell.run(command); - } - - @Override - public void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) - throws IOException, AccessControlException, Exception { - DefaultFileAccess.checkFileAccess(fs, stat, action); - } - - @Override - public String getPassword(Configuration conf, String name) { - // No password API, just retrieve value from conf - return conf.get(name); - } - - @Override - public boolean supportStickyBit() { - return false; - } - - @Override - public boolean hasStickyBit(FsPermission permission) { - return false; // not supported - } - - @Override - public boolean supportTrashFeature() { - return false; - } - - @Override - public Path getCurrentTrashPath(Configuration conf, FileSystem fs) { - return null; - } - - @Override - public KerberosNameShim getKerberosNameShim(String name) throws IOException { - // Not supported - return null; - } - - @Override - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) { - // Not supported - } -} diff --git a/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java b/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java deleted file mode 100644 index 13c6b31..0000000 --- a/shims/0.20/src/main/java/org/apache/hadoop/hive/shims/Jetty20Shims.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.IOException; - -import org.mortbay.jetty.bio.SocketConnector; -import org.mortbay.jetty.handler.RequestLogHandler; -import org.mortbay.jetty.webapp.WebAppContext; - -/** - * Jetty20Shims. - * - */ -public class Jetty20Shims implements JettyShims { - public Server startServer(String listen, int port) throws IOException { - Server s = new Server(); - s.setupListenerHostPort(listen, port); - return s; - } - - private static class Server extends org.mortbay.jetty.Server implements JettyShims.Server { - public void addWar(String war, String contextPath) { - WebAppContext wac = new WebAppContext(); - wac.setContextPath(contextPath); - wac.setWar(war); - RequestLogHandler rlh = new RequestLogHandler(); - rlh.setHandler(wac); - this.addHandler(rlh); - } - - public void setupListenerHostPort(String listen, int port) - throws IOException { - - SocketConnector connector = new SocketConnector(); - connector.setPort(port); - connector.setHost(listen); - this.addConnector(connector); - } - } -} diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java index 7e5b025..f7a741c 100644 --- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java +++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java @@ -84,7 +84,6 @@ public String getTaskAttemptLogUrl(JobConf conf, @Override public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { - JobTrackerState state; switch (clusterStatus.getJobTrackerState()) { case INITIALIZING: return JobTrackerState.INITIALIZING; @@ -512,6 +511,11 @@ public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext con } @Override + public JobConf getJobConf(org.apache.hadoop.mapred.JobContext context) { + return context.getJobConf(); + } + + @Override public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException { boolean origDisableHDFSCache = conf.getBoolean("fs." + uri.getScheme() + ".impl.disable.cache", false); diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index b17ff69..032a110 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -436,7 +436,7 @@ public TaskAttemptID createTaskAttemptID() { Reporter.class); construct.setAccessible(true); newContext = (org.apache.hadoop.mapred.TaskAttemptContext) construct.newInstance( - new JobConf(conf), taskId, (Reporter) progressable); + new JobConf(conf), taskId, progressable); } catch (Exception e) { throw new RuntimeException(e); } @@ -454,7 +454,7 @@ public JobContext createJobContext(Configuration conf, public org.apache.hadoop.mapred.JobContext createJobContext(org.apache.hadoop.mapred.JobConf conf, org.apache.hadoop.mapreduce.JobID jobId, Progressable progressable) { return new org.apache.hadoop.mapred.JobContextImpl( - new JobConf(conf), jobId, (org.apache.hadoop.mapred.Reporter) progressable); + new JobConf(conf), jobId, progressable); } @Override @@ -610,8 +610,8 @@ public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, } public class Hadoop23FileStatus implements HdfsFileStatus { - private FileStatus fileStatus; - private AclStatus aclStatus; + private final FileStatus fileStatus; + private final AclStatus aclStatus; public Hadoop23FileStatus(FileStatus fileStatus, AclStatus aclStatus) { this.fileStatus = fileStatus; this.aclStatus = aclStatus; @@ -679,7 +679,7 @@ public ProxyFileSystem23(FileSystem fs, URI uri) { public RemoteIterator listLocatedStatus(final Path f) throws FileNotFoundException, IOException { return new RemoteIterator() { - private RemoteIterator stats = + private final RemoteIterator stats = ProxyFileSystem23.super.listLocatedStatus( ProxyFileSystem23.super.swizzleParamPath(f)); @@ -712,7 +712,6 @@ public void access(Path path, FsAction action) throws AccessControlException, accessMethod.invoke(fs, underlyingFsPath, action); } else { // If the FS has no access() method, we can try DefaultFileAccess .. - UserGroupInformation ugi = getUGIForConf(getConf()); DefaultFileAccess.checkFileAccess(fs, underlyingFsStatus, action); } } catch (AccessControlException err) { @@ -775,6 +774,11 @@ public Configuration getConfiguration(org.apache.hadoop.mapreduce.JobContext con } @Override + public JobConf getJobConf(org.apache.hadoop.mapred.JobContext context) { + return context.getJobConf(); + } + + @Override public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException { return FileSystem.newInstance(uri, conf); } @@ -901,28 +905,33 @@ public KerberosNameShim getKerberosNameShim(String name) throws IOException { */ public class KerberosNameShim implements HadoopShimsSecure.KerberosNameShim { - private KerberosName kerberosName; + private final KerberosName kerberosName; public KerberosNameShim(String name) { kerberosName = new KerberosName(name); } + @Override public String getDefaultRealm() { return kerberosName.getDefaultRealm(); } + @Override public String getServiceName() { return kerberosName.getServiceName(); } + @Override public String getHostName() { return kerberosName.getHostName(); } + @Override public String getRealm() { return kerberosName.getRealm(); } + @Override public String getShortName() throws IOException { return kerberosName.getShortName(); } diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java index a4cd37d..5e21c9f 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge23.java @@ -31,7 +31,7 @@ * * This is a 0.23/2.x specific implementation */ -public class HadoopThriftAuthBridge23 extends HadoopThriftAuthBridge20S { +public class HadoopThriftAuthBridge23 extends HadoopThriftAuthBridge { private static Field SASL_PROPS_FIELD; private static Class SASL_PROPERTIES_RESOLVER_CLASS; diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index 4dd3dd7..3772177 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -41,18 +41,6 @@ org.apache.hive.shims - hive-shims-0.20 - ${project.version} - runtime - - - org.apache.hive.shims - hive-shims-common-secure - ${project.version} - compile - - - org.apache.hive.shims hive-shims-0.20S ${project.version} runtime diff --git a/shims/common-secure/pom.xml b/shims/common-secure/pom.xml deleted file mode 100644 index 2c56f5d..0000000 --- a/shims/common-secure/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - 4.0.0 - - org.apache.hive - hive - 0.15.0-SNAPSHOT - ../../pom.xml - - - org.apache.hive.shims - hive-shims-common-secure - jar - Hive Shims Secure Common - - - ../.. - - - - - - - org.apache.hive.shims - hive-shims-common - ${project.version} - - - - commons-codec - commons-codec - ${commons-codec.version} - - - commons-lang - commons-lang - ${commons-lang.version} - - - commons-logging - commons-logging - ${commons-logging.version} - - - org.apache.hadoop - hadoop-core - ${hadoop-20S.version} - true - - - org.apache.hadoop - hadoop-tools - ${hadoop-20S.version} - true - - - org.apache.thrift - libthrift - ${libthrift.version} - - - org.apache.curator - curator-framework - ${curator.version} - - - org.apache.zookeeper - zookeeper - ${zookeeper.version} - - - org.apache.hadoop - hadoop-core - - - - - diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java deleted file mode 100644 index 19ba6a8..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ /dev/null @@ -1,771 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.shims; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.net.URI; -import java.net.URISyntaxException; -import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import javax.security.auth.login.AppConfigurationEntry; -import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.DefaultFileAccess; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; -import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; -import org.apache.hadoop.hive.thrift.DelegationTokenSelector; -import org.apache.hadoop.http.HtmlQuoting; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.ClusterStatus; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.mapred.lib.CombineFileInputFormat; -import org.apache.hadoop.mapred.lib.CombineFileSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.util.KerberosUtil; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.TokenSelector; -import org.apache.hadoop.tools.HadoopArchives; -import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.ToolRunner; -import org.apache.zookeeper.client.ZooKeeperSaslClient; - -import com.google.common.primitives.Longs; - -/** - * Base implemention for shims against secure Hadoop 0.20.3/0.23. - */ -public abstract class HadoopShimsSecure implements HadoopShims { - - static final Log LOG = LogFactory.getLog(HadoopShimsSecure.class); - - @Override - public String unquoteHtmlChars(String item) { - return HtmlQuoting.unquoteHtmlChars(item); - } - - @Override - public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { - return new CombineFileInputFormatShim() { - @Override - public RecordReader getRecordReader(InputSplit split, - JobConf job, Reporter reporter) throws IOException { - throw new IOException("CombineFileInputFormat.getRecordReader not needed."); - } - }; - } - - public static class InputSplitShim extends CombineFileSplit implements HadoopShims.InputSplitShim { - long shrinkedLength; - boolean _isShrinked; - public InputSplitShim() { - super(); - _isShrinked = false; - } - - public InputSplitShim(JobConf conf, Path[] paths, long[] startOffsets, - long[] lengths, String[] locations) throws IOException { - super(conf, paths, startOffsets, lengths, dedup(locations)); - _isShrinked = false; - } - - @Override - public void shrinkSplit(long length) { - _isShrinked = true; - shrinkedLength = length; - } - - public boolean isShrinked() { - return _isShrinked; - } - - public long getShrinkedLength() { - return shrinkedLength; - } - - @Override - public void readFields(DataInput in) throws IOException { - super.readFields(in); - _isShrinked = in.readBoolean(); - if (_isShrinked) { - shrinkedLength = in.readLong(); - } - } - - @Override - public void write(DataOutput out) throws IOException { - super.write(out); - out.writeBoolean(_isShrinked); - if (_isShrinked) { - out.writeLong(shrinkedLength); - } - } - } - - /* This class should be replaced with org.apache.hadoop.mapred.lib.CombineFileRecordReader class, once - * https://issues.apache.org/jira/browse/MAPREDUCE-955 is fixed. This code should be removed - it is a copy - * of org.apache.hadoop.mapred.lib.CombineFileRecordReader - */ - public static class CombineFileRecordReader implements RecordReader { - - static final Class[] constructorSignature = new Class[] { - InputSplit.class, - Configuration.class, - Reporter.class, - Integer.class - }; - - protected CombineFileSplit split; - protected JobConf jc; - protected Reporter reporter; - protected Class> rrClass; - protected Constructor> rrConstructor; - protected FileSystem fs; - - protected int idx; - protected long progress; - protected RecordReader curReader; - protected boolean isShrinked; - protected long shrinkedLength; - - @Override - public boolean next(K key, V value) throws IOException { - - while ((curReader == null) - || !doNextWithExceptionHandler((K) ((CombineHiveKey) key).getKey(), - value)) { - if (!initNextRecordReader(key)) { - return false; - } - } - return true; - } - - @Override - public K createKey() { - K newKey = curReader.createKey(); - return (K)(new CombineHiveKey(newKey)); - } - - @Override - public V createValue() { - return curReader.createValue(); - } - - /** - * Return the amount of data processed. - */ - @Override - public long getPos() throws IOException { - return progress; - } - - @Override - public void close() throws IOException { - if (curReader != null) { - curReader.close(); - curReader = null; - } - } - - /** - * Return progress based on the amount of data processed so far. - */ - @Override - public float getProgress() throws IOException { - return Math.min(1.0f, progress / (float) (split.getLength())); - } - - /** - * A generic RecordReader that can hand out different recordReaders - * for each chunk in the CombineFileSplit. - */ - public CombineFileRecordReader(JobConf job, CombineFileSplit split, - Reporter reporter, - Class> rrClass) - throws IOException { - this.split = split; - this.jc = job; - this.rrClass = rrClass; - this.reporter = reporter; - this.idx = 0; - this.curReader = null; - this.progress = 0; - - isShrinked = false; - - assert (split instanceof InputSplitShim); - if (((InputSplitShim) split).isShrinked()) { - isShrinked = true; - shrinkedLength = ((InputSplitShim) split).getShrinkedLength(); - } - - try { - rrConstructor = rrClass.getDeclaredConstructor(constructorSignature); - rrConstructor.setAccessible(true); - } catch (Exception e) { - throw new RuntimeException(rrClass.getName() + - " does not have valid constructor", e); - } - initNextRecordReader(null); - } - - /** - * do next and handle exception inside it. - * @param key - * @param value - * @return - * @throws IOException - */ - private boolean doNextWithExceptionHandler(K key, V value) throws IOException { - try { - return curReader.next(key, value); - } catch (Exception e) { - return HiveIOExceptionHandlerUtil - .handleRecordReaderNextException(e, jc); - } - } - - /** - * Get the record reader for the next chunk in this CombineFileSplit. - */ - protected boolean initNextRecordReader(K key) throws IOException { - - if (curReader != null) { - curReader.close(); - curReader = null; - if (idx > 0) { - progress += split.getLength(idx - 1); // done processing so far - } - } - - // if all chunks have been processed, nothing more to do. - if (idx == split.getNumPaths() || (isShrinked && progress > shrinkedLength)) { - return false; - } - - // get a record reader for the idx-th chunk - try { - curReader = rrConstructor.newInstance(new Object[] - {split, jc, reporter, Integer.valueOf(idx)}); - - // change the key if need be - if (key != null) { - K newKey = curReader.createKey(); - ((CombineHiveKey)key).setKey(newKey); - } - - // setup some helper config variables. - jc.set("map.input.file", split.getPath(idx).toString()); - jc.setLong("map.input.start", split.getOffset(idx)); - jc.setLong("map.input.length", split.getLength(idx)); - } catch (Exception e) { - curReader = HiveIOExceptionHandlerUtil.handleRecordReaderCreationException( - e, jc); - } - idx++; - return true; - } - } - - public abstract static class CombineFileInputFormatShim extends - CombineFileInputFormat - implements HadoopShims.CombineFileInputFormatShim { - - @Override - public Path[] getInputPathsShim(JobConf conf) { - try { - return FileInputFormat.getInputPaths(conf); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public void createPool(JobConf conf, PathFilter... filters) { - super.createPool(conf, filters); - } - - @Override - public InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException { - long minSize = job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 0); - - // For backward compatibility, let the above parameter be used - if (job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERNODE"), 0) == 0) { - super.setMinSplitSizeNode(minSize); - } - - if (job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERRACK"), 0) == 0) { - super.setMinSplitSizeRack(minSize); - } - - if (job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), 0) == 0) { - super.setMaxSplitSize(minSize); - } - - InputSplit[] splits = super.getSplits(job, numSplits); - - ArrayList inputSplitShims = new ArrayList(); - for (int pos = 0; pos < splits.length; pos++) { - CombineFileSplit split = (CombineFileSplit) splits[pos]; - Set dirIndices = getDirIndices(split.getPaths(), job); - if (dirIndices.size() != split.getPaths().length) { - List prunedPaths = prune(dirIndices, Arrays.asList(split.getPaths())); - List prunedStartOffsets = prune(dirIndices, Arrays.asList( - ArrayUtils.toObject(split.getStartOffsets()))); - List prunedLengths = prune(dirIndices, Arrays.asList( - ArrayUtils.toObject(split.getLengths()))); - inputSplitShims.add(new InputSplitShim(job, prunedPaths.toArray(new Path[prunedPaths.size()]), - Longs.toArray(prunedStartOffsets), - Longs.toArray(prunedLengths), split.getLocations())); - } - } - return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]); - } - - @Override - public InputSplitShim getInputSplitShim() throws IOException { - return new InputSplitShim(); - } - - @Override - public RecordReader getRecordReader(JobConf job, HadoopShims.InputSplitShim split, - Reporter reporter, - Class> rrClass) - throws IOException { - CombineFileSplit cfSplit = (CombineFileSplit) split; - return new CombineFileRecordReader(job, cfSplit, reporter, rrClass); - } - - } - - @Override - public String getInputFormatClassName() { - return "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat"; - } - - String[] ret = new String[2]; - - @Override - public int createHadoopArchive(Configuration conf, Path sourceDir, Path destDir, - String archiveName) throws Exception { - - HadoopArchives har = new HadoopArchives(conf); - List args = new ArrayList(); - - args.add("-archiveName"); - args.add(archiveName); - args.add("-p"); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - - return ToolRunner.run(har, args.toArray(new String[0])); - } - - /* - * This particular instance is for Hadoop 1.0 which creates an archive - * with only the relative path of the archived directory stored within - * the archive as compared to the full path in case of earlier versions. - * See this api in Hadoop20Shims for comparison. - */ - @Override - public URI getHarUri(URI original, URI base, URI originalBase) - throws URISyntaxException { - URI relative = originalBase.relativize(original); - if (relative.isAbsolute()) { - throw new URISyntaxException("Couldn't create URI for location.", - "Relative: " + relative + " Base: " - + base + " OriginalBase: " + originalBase); - } - - return base.resolve(relative); - } - - public static class NullOutputCommitter extends OutputCommitter { - @Override - public void setupJob(JobContext jobContext) { } - @Override - public void cleanupJob(JobContext jobContext) { } - - @Override - public void setupTask(TaskAttemptContext taskContext) { } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } - @Override - public void commitTask(TaskAttemptContext taskContext) { } - @Override - public void abortTask(TaskAttemptContext taskContext) { } - } - - @Override - public void prepareJobOutput(JobConf conf) { - conf.setOutputCommitter(NullOutputCommitter.class); - - // option to bypass job setup and cleanup was introduced in hadoop-21 (MAPREDUCE-463) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false); - - // option to bypass task cleanup task was introduced in hadoop-23 (MAPREDUCE-2206) - // but can be backported. So we disable setup/cleanup in all versions >= 0.19 - conf.setBoolean(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false); - } - - @Override - public UserGroupInformation getUGIForConf(Configuration conf) throws IOException { - String doAs = System.getenv("HADOOP_USER_NAME"); - if(doAs != null && doAs.length() > 0) { - /* - * this allows doAs (proxy user) to be passed along across process boundary where - * delegation tokens are not supported. For example, a DDL stmt via WebHCat with - * a doAs parameter, forks to 'hcat' which needs to start a Session that - * proxies the end user - */ - return UserGroupInformation.createProxyUser(doAs, UserGroupInformation.getLoginUser()); - } - return UserGroupInformation.getCurrentUser(); - } - - @Override - public boolean isSecureShimImpl() { - return true; - } - - @Override - public String getShortUserName(UserGroupInformation ugi) { - return ugi.getShortUserName(); - } - - @Override - public String getTokenStrForm(String tokenSignature) throws IOException { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - TokenSelector tokenSelector = new DelegationTokenSelector(); - - Token token = tokenSelector.selectToken( - tokenSignature == null ? new Text() : new Text(tokenSignature), ugi.getTokens()); - return token != null ? token.encodeToUrlString() : null; - } - - /** - * Create a delegation token object for the given token string and service. - * Add the token to given UGI - */ - @Override - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) throws IOException { - Token delegationToken = createToken(tokenStr, tokenService); - ugi.addToken(delegationToken); - } - - /** - * Add a given service to delegation token string. - */ - @Override - public String addServiceToToken(String tokenStr, String tokenService) - throws IOException { - Token delegationToken = createToken(tokenStr, tokenService); - return delegationToken.encodeToUrlString(); - } - - /** - * Create a new token using the given string and service - * @param tokenStr - * @param tokenService - * @return - * @throws IOException - */ - private Token createToken(String tokenStr, String tokenService) - throws IOException { - Token delegationToken = new Token(); - delegationToken.decodeFromUrlString(tokenStr); - delegationToken.setService(new Text(tokenService)); - return delegationToken; - } - - @Override - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws IOException, InterruptedException { - return ugi.doAs(pvea); - } - - @Override - public Path createDelegationTokenFile(Configuration conf) throws IOException { - - //get delegation token for user - String uname = UserGroupInformation.getLoginUser().getShortUserName(); - FileSystem fs = FileSystem.get(conf); - Token fsToken = fs.getDelegationToken(uname); - - File t = File.createTempFile("hive_hadoop_delegation_token", null); - Path tokenPath = new Path(t.toURI()); - - //write credential with token to file - Credentials cred = new Credentials(); - cred.addToken(fsToken.getService(), fsToken); - cred.writeTokenStorageFile(tokenPath, conf); - - return tokenPath; - } - - @Override - public UserGroupInformation createProxyUser(String userName) throws IOException { - return UserGroupInformation.createProxyUser( - userName, UserGroupInformation.getLoginUser()); - } - - @Override - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException { - ProxyUsers.refreshSuperUserGroupsConfiguration(conf); - ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, realUserUgi), - ipAddress, conf); - } - - @Override - public boolean isSecurityEnabled() { - return UserGroupInformation.isSecurityEnabled(); - } - - @Override - public UserGroupInformation createRemoteUser(String userName, List groupNames) { - return UserGroupInformation.createRemoteUser(userName); - } - - @Override - public void closeAllForUGI(UserGroupInformation ugi) { - try { - FileSystem.closeAllForUGI(ugi); - } catch (IOException e) { - LOG.error("Could not clean up file-system handles for UGI: " + ugi, e); - } - } - - @Override - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException { - String hostPrincipal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - UserGroupInformation.loginUserFromKeytab(hostPrincipal, keytabFile); - } - - @Override - public UserGroupInformation loginUserFromKeytabAndReturnUGI( - String principal, String keytabFile) throws IOException { - String hostPrincipal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - return UserGroupInformation.loginUserFromKeytabAndReturnUGI(hostPrincipal, keytabFile); - } - - /** - * Convert Kerberos principal name pattern to valid Kerberos principal names. - * @param principal (principal name pattern) - * @return - * @throws IOException - */ - @Override - public String getResolvedPrincipal(String principal) throws IOException { - return SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); - } - - @Override - public String getTokenFileLocEnvName() { - return UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION; - } - - @Override - public void reLoginUserFromKeytab() throws IOException{ - UserGroupInformation ugi = UserGroupInformation.getLoginUser(); - //checkTGT calls ugi.relogin only after checking if it is close to tgt expiry - //hadoop relogin is actually done only every x minutes (x=10 in hadoop 1.x) - if(ugi.isFromKeytab()){ - ugi.checkTGTAndReloginFromKeytab(); - } - } - - @Override - public boolean isLoginKeytabBased() throws IOException { - return UserGroupInformation.isLoginKeytabBased(); - } - - @Override - abstract public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception; - - @Override - abstract public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext( - Configuration conf, final Progressable progressable); - - @Override - abstract public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job); - - @Override - abstract public boolean isLocalMode(Configuration conf); - - @Override - abstract public void setJobLauncherRpcAddress(Configuration conf, String val); - - @Override - abstract public String getJobLauncherHttpAddress(Configuration conf); - - @Override - abstract public String getJobLauncherRpcAddress(Configuration conf); - - @Override - abstract public short getDefaultReplication(FileSystem fs, Path path); - - @Override - abstract public long getDefaultBlockSize(FileSystem fs, Path path); - - @Override - abstract public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) - throws IOException; - - @Override - abstract public FileSystem createProxyFileSystem(FileSystem fs, URI uri); - - @Override - abstract public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException; - - protected void run(FsShell shell, String[] command) throws Exception { - LOG.debug(ArrayUtils.toString(command)); - int retval = shell.run(command); - LOG.debug("Return value is :" + retval); - } - - /** - * CombineFileInputFormat sometimes returns directories as splits, need to prune them. - */ - private static Set getDirIndices(Path[] paths, JobConf conf) throws IOException { - Set result = new HashSet(); - for (int i = 0; i < paths.length; i++) { - FileSystem fs = paths[i].getFileSystem(conf); - if (!fs.isFile(paths[i])) { - result.add(i); - } - } - return result; - } - - private static List prune(Set indicesToPrune, List elms) { - List result = new ArrayList(); - int i = 0; - for (K elm : elms) { - if (indicesToPrune.contains(i)) { - continue; - } - result.add(elm); - i++; - } - return result; - } - - private static String[] dedup(String[] locations) throws IOException { - Set dedup = new HashSet(); - Collections.addAll(dedup, locations); - return dedup.toArray(new String[dedup.size()]); - } - - @Override - public void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) - throws IOException, AccessControlException, Exception { - DefaultFileAccess.checkFileAccess(fs, stat, action); - } - - @Override - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) throws IOException { - // ZooKeeper property name to pick the correct JAAS conf section - final String SASL_LOGIN_CONTEXT_NAME = "HiveZooKeeperClient"; - System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, SASL_LOGIN_CONTEXT_NAME); - - principal = getResolvedPrincipal(principal); - JaasConfiguration jaasConf = new JaasConfiguration(SASL_LOGIN_CONTEXT_NAME, principal, keyTabFile); - - // Install the Configuration in the runtime. - javax.security.auth.login.Configuration.setConfiguration(jaasConf); - } - - /** - * A JAAS configuration for ZooKeeper clients intended to use for SASL - * Kerberos. - */ - private static class JaasConfiguration extends javax.security.auth.login.Configuration { - // Current installed Configuration - private final javax.security.auth.login.Configuration baseConfig = javax.security.auth.login.Configuration - .getConfiguration(); - private final String loginContextName; - private final String principal; - private final String keyTabFile; - - public JaasConfiguration(String hiveLoginContextName, String principal, String keyTabFile) { - this.loginContextName = hiveLoginContextName; - this.principal = principal; - this.keyTabFile = keyTabFile; - } - - @Override - public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { - if (loginContextName.equals(appName)) { - Map krbOptions = new HashMap(); - krbOptions.put("doNotPrompt", "true"); - krbOptions.put("storeKey", "true"); - krbOptions.put("useKeyTab", "true"); - krbOptions.put("principal", principal); - krbOptions.put("keyTab", keyTabFile); - krbOptions.put("refreshKrb5Config", "true"); - AppConfigurationEntry hiveZooKeeperClientEntry = new AppConfigurationEntry( - KerberosUtil.getKrb5LoginModuleName(), LoginModuleControlFlag.REQUIRED, krbOptions); - return new AppConfigurationEntry[] { hiveZooKeeperClientEntry }; - } - // Try the base config - if (baseConfig != null) { - return baseConfig.getAppConfigurationEntry(appName); - } - return null; - } - } - -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java deleted file mode 100644 index d86c5cb..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java +++ /dev/null @@ -1,152 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; -import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; - -public class DBTokenStore implements DelegationTokenStore { - - - @Override - public int addMasterKey(String s) throws TokenStoreException { - return (Integer)invokeOnRawStore("addMasterKey", new Object[]{s},String.class); - } - - @Override - public void updateMasterKey(int keySeq, String s) throws TokenStoreException { - invokeOnRawStore("updateMasterKey", new Object[] {Integer.valueOf(keySeq), s}, - Integer.class, String.class); - } - - @Override - public boolean removeMasterKey(int keySeq) { - return (Boolean)invokeOnRawStore("removeMasterKey", new Object[] {Integer.valueOf(keySeq)}, - Integer.class); - } - - @Override - public String[] getMasterKeys() throws TokenStoreException { - return (String[])invokeOnRawStore("getMasterKeys", new Object[0]); - } - - @Override - public boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) throws TokenStoreException { - - try { - String identifier = TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier); - String tokenStr = Base64.encodeBase64URLSafeString( - HiveDelegationTokenSupport.encodeDelegationTokenInformation(token)); - return (Boolean)invokeOnRawStore("addToken", new Object[] {identifier, tokenStr}, - String.class, String.class); - } catch (IOException e) { - throw new TokenStoreException(e); - } - } - - @Override - public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) - throws TokenStoreException { - try { - String tokenStr = (String)invokeOnRawStore("getToken", new Object[] { - TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier)}, String.class); - return (null == tokenStr) ? null : HiveDelegationTokenSupport.decodeDelegationTokenInformation(Base64.decodeBase64(tokenStr)); - } catch (IOException e) { - throw new TokenStoreException(e); - } - } - - @Override - public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException{ - try { - return (Boolean)invokeOnRawStore("removeToken", new Object[] { - TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier)}, String.class); - } catch (IOException e) { - throw new TokenStoreException(e); - } - } - - @Override - public List getAllDelegationTokenIdentifiers() throws TokenStoreException{ - - List tokenIdents = (List)invokeOnRawStore("getAllTokenIdentifiers", new Object[0]); - List delTokenIdents = new ArrayList(tokenIdents.size()); - - for (String tokenIdent : tokenIdents) { - DelegationTokenIdentifier delToken = new DelegationTokenIdentifier(); - try { - TokenStoreDelegationTokenSecretManager.decodeWritable(delToken, tokenIdent); - } catch (IOException e) { - throw new TokenStoreException(e); - } - delTokenIdents.add(delToken); - } - return delTokenIdents; - } - - private Object rawStore; - - @Override - public void init(Object rawStore, ServerMode smode) throws TokenStoreException { - this.rawStore = rawStore; - } - - private Object invokeOnRawStore(String methName, Object[] params, Class ... paramTypes) - throws TokenStoreException{ - - try { - return rawStore.getClass().getMethod(methName, paramTypes).invoke(rawStore, params); - } catch (IllegalArgumentException e) { - throw new TokenStoreException(e); - } catch (SecurityException e) { - throw new TokenStoreException(e); - } catch (IllegalAccessException e) { - throw new TokenStoreException(e); - } catch (InvocationTargetException e) { - throw new TokenStoreException(e.getCause()); - } catch (NoSuchMethodException e) { - throw new TokenStoreException(e); - } - } - - @Override - public void setConf(Configuration conf) { - // No-op - } - - @Override - public Configuration getConf() { - return null; - } - - @Override - public void close() throws IOException { - // No-op. - } - -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java deleted file mode 100644 index 4ca3c0b..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; - -/** - * A delegation token identifier that is specific to Hive. - */ -public class DelegationTokenIdentifier - extends AbstractDelegationTokenIdentifier { - public static final Text HIVE_DELEGATION_KIND = new Text("HIVE_DELEGATION_TOKEN"); - - /** - * Create an empty delegation token identifier for reading into. - */ - public DelegationTokenIdentifier() { - } - - /** - * Create a new delegation token identifier - * @param owner the effective username of the token owner - * @param renewer the username of the renewer - * @param realUser the real username of the token owner - */ - public DelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { - super(owner, renewer, realUser); - } - - @Override - public Text getKind() { - return HIVE_DELEGATION_KIND; - } - -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java deleted file mode 100644 index 19d1fbf..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; - -/** - * A Hive specific delegation token secret manager. - * The secret manager is responsible for generating and accepting the password - * for each token. - */ -public class DelegationTokenSecretManager - extends AbstractDelegationTokenSecretManager { - - /** - * Create a secret manager - * @param delegationKeyUpdateInterval the number of seconds for rolling new - * secret keys. - * @param delegationTokenMaxLifetime the maximum lifetime of the delegation - * tokens - * @param delegationTokenRenewInterval how often the tokens must be renewed - * @param delegationTokenRemoverScanInterval how often the tokens are scanned - * for expired tokens - */ - public DelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, - long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, - delegationTokenRenewInterval, delegationTokenRemoverScanInterval); - } - - @Override - public DelegationTokenIdentifier createIdentifier() { - return new DelegationTokenIdentifier(); - } - - public synchronized void cancelDelegationToken(String tokenStrForm) throws IOException { - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - String user = UserGroupInformation.getCurrentUser().getUserName(); - cancelToken(t, user); - } - - public synchronized long renewDelegationToken(String tokenStrForm) throws IOException { - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - String user = UserGroupInformation.getCurrentUser().getUserName(); - return renewToken(t, user); - } - - public synchronized String getDelegationToken(String renewer) throws IOException { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - Text owner = new Text(ugi.getUserName()); - Text realUser = null; - if (ugi.getRealUser() != null) { - realUser = new Text(ugi.getRealUser().getUserName()); - } - DelegationTokenIdentifier ident = - new DelegationTokenIdentifier(owner, new Text(renewer), realUser); - Token t = new Token( - ident, this); - return t.encodeToUrlString(); - } - - public String getUserFromToken(String tokenStr) throws IOException { - Token delegationToken = new Token(); - delegationToken.decodeFromUrlString(tokenStr); - - ByteArrayInputStream buf = new ByteArrayInputStream(delegationToken.getIdentifier()); - DataInputStream in = new DataInputStream(buf); - DelegationTokenIdentifier id = createIdentifier(); - id.readFields(in); - return id.getUser().getShortUserName(); - } -} - diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java deleted file mode 100644 index f6e2420..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; - -/** - * A delegation token that is specialized for Hive - */ - -public class DelegationTokenSelector - extends AbstractDelegationTokenSelector{ - - public DelegationTokenSelector() { - super(DelegationTokenIdentifier.HIVE_DELEGATION_KIND); - } -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java deleted file mode 100644 index 867b4ed..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.thrift; - -import java.io.Closeable; -import java.util.List; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Interface for pluggable token store that can be implemented with shared external - * storage for load balancing and high availability (for example using ZooKeeper). - * Internal, store specific errors are translated into {@link TokenStoreException}. - */ -public interface DelegationTokenStore extends Configurable, Closeable { - - /** - * Exception for internal token store errors that typically cannot be handled by the caller. - */ - public static class TokenStoreException extends RuntimeException { - private static final long serialVersionUID = -8693819817623074083L; - - public TokenStoreException(Throwable cause) { - super(cause); - } - - public TokenStoreException(String message, Throwable cause) { - super(message, cause); - } - } - - /** - * Add new master key. The token store assigns and returns the sequence number. - * Caller needs to use the identifier to update the key (since it is embedded in the key). - * - * @param s - * @return sequence number for new key - */ - int addMasterKey(String s) throws TokenStoreException; - - /** - * Update master key (for expiration and setting store assigned sequence within key) - * @param keySeq - * @param s - * @throws TokenStoreException - */ - void updateMasterKey(int keySeq, String s) throws TokenStoreException; - - /** - * Remove key for given id. - * @param keySeq - * @return false if key no longer present, true otherwise. - */ - boolean removeMasterKey(int keySeq); - - /** - * Return all master keys. - * @return - * @throws TokenStoreException - */ - String[] getMasterKeys() throws TokenStoreException; - - /** - * Add token. If identifier is already present, token won't be added. - * @param tokenIdentifier - * @param token - * @return true if token was added, false for existing identifier - */ - boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) throws TokenStoreException; - - /** - * Get token. Returns null if the token does not exist. - * @param tokenIdentifier - * @return - */ - DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) - throws TokenStoreException; - - /** - * Remove token. Return value can be used by caller to detect concurrency. - * @param tokenIdentifier - * @return true if token was removed, false if it was already removed. - * @throws TokenStoreException - */ - boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException; - - /** - * List of all token identifiers in the store. This is used to remove expired tokens - * and a potential scalability improvement would be to partition by master key id - * @return - */ - List getAllDelegationTokenIdentifiers() throws TokenStoreException; - - /** - * @param hmsHandler ObjectStore used by DBTokenStore - * @param smode Indicate whether this is a metastore or hiveserver2 token store - */ - void init(Object hmsHandler, ServerMode smode); - -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java deleted file mode 100644 index 624ac6b..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java +++ /dev/null @@ -1,731 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.thrift; - -import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.Socket; -import java.security.PrivilegedAction; -import java.security.PrivilegedExceptionAction; -import java.util.Locale; -import java.util.Map; - -import javax.security.auth.callback.Callback; -import javax.security.auth.callback.CallbackHandler; -import javax.security.auth.callback.NameCallback; -import javax.security.auth.callback.PasswordCallback; -import javax.security.auth.callback.UnsupportedCallbackException; -import javax.security.sasl.AuthorizeCallback; -import javax.security.sasl.RealmCallback; -import javax.security.sasl.RealmChoiceCallback; -import javax.security.sasl.SaslException; -import javax.security.sasl.SaslServer; - -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport; -import org.apache.hadoop.security.SaslRpcServer; -import org.apache.hadoop.security.SaslRpcServer.AuthMethod; -import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; -import org.apache.hadoop.security.authorize.AuthorizationException; -import org.apache.hadoop.security.authorize.ProxyUsers; -import org.apache.hadoop.security.token.SecretManager.InvalidToken; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.thrift.TException; -import org.apache.thrift.TProcessor; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TSaslServerTransport; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; -import org.apache.thrift.transport.TTransportFactory; - -/** - * Functions that bridge Thrift's SASL transports to Hadoop's - * SASL callback handlers and authentication classes. - */ -public class HadoopThriftAuthBridge20S extends HadoopThriftAuthBridge { - static final Log LOG = LogFactory.getLog(HadoopThriftAuthBridge.class); - - @Override - public Client createClient() { - return new Client(); - } - - @Override - public Client createClientWithConf(String authMethod) { - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getLoginUser(); - } catch(IOException e) { - throw new IllegalStateException("Unable to get current login user: " + e, e); - } - if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); - return new Client(); - } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); - UserGroupInformation.setConfiguration(conf); - return new Client(); - } - } - - @Override - public Server createServer(String keytabFile, String principalConf) throws TTransportException { - return new Server(keytabFile, principalConf); - } - - @Override - public String getServerPrincipal(String principalConfig, String host) - throws IOException { - String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); - String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); - if (names.length != 3) { - throw new IOException( - "Kerberos principal name does NOT have the expected hostname part: " - + serverPrincipal); - } - return serverPrincipal; - } - - @Override - public UserGroupInformation getCurrentUGIWithConf(String authMethod) - throws IOException { - UserGroupInformation ugi; - try { - ugi = UserGroupInformation.getCurrentUser(); - } catch(IOException e) { - throw new IllegalStateException("Unable to get current user: " + e, e); - } - if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { - LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); - return ugi; - } else { - LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); - UserGroupInformation.setConfiguration(conf); - return UserGroupInformation.getCurrentUser(); - } - } - - /** - * Return true if the current login user is already using the given authMethod. - * - * Used above to ensure we do not create a new Configuration object and as such - * lose other settings such as the cluster to which the JVM is connected. Required - * for oozie since it does not have a core-site.xml see HIVE-7682 - */ - private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { - AuthenticationMethod authMethod; - try { - // based on SecurityUtil.getAuthenticationMethod() - authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); - } catch (IllegalArgumentException iae) { - throw new IllegalArgumentException("Invalid attribute value for " + - HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); - } - LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); - return ugi.getAuthenticationMethod().equals(authMethod); - } - - - /** - * Read and return Hadoop SASL configuration which can be configured using - * "hadoop.rpc.protection" - * @param conf - * @return Hadoop SASL configuration - */ - @Override - public Map getHadoopSaslProperties(Configuration conf) { - // Initialize the SaslRpcServer to ensure QOP parameters are read from conf - SaslRpcServer.init(conf); - return SaslRpcServer.SASL_PROPS; - } - - public static class Client extends HadoopThriftAuthBridge.Client { - /** - * Create a client-side SASL transport that wraps an underlying transport. - * - * @param method The authentication method to use. Currently only KERBEROS is - * supported. - * @param serverPrincipal The Kerberos principal of the target server. - * @param underlyingTransport The underlying transport mechanism, usually a TSocket. - * @param saslProps the sasl properties to create the client with - */ - - @Override - public TTransport createClientTransport( - String principalConfig, String host, - String methodStr, String tokenStrForm, TTransport underlyingTransport, - Map saslProps) throws IOException { - AuthMethod method = AuthMethod.valueOf(AuthMethod.class, methodStr); - - TTransport saslTransport = null; - switch (method) { - case DIGEST: - Token t= new Token(); - t.decodeFromUrlString(tokenStrForm); - saslTransport = new TSaslClientTransport( - method.getMechanismName(), - null, - null, SaslRpcServer.SASL_DEFAULT_REALM, - saslProps, new SaslClientCallbackHandler(t), - underlyingTransport); - return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); - - case KERBEROS: - String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); - String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); - if (names.length != 3) { - throw new IOException( - "Kerberos principal name does NOT have the expected hostname part: " - + serverPrincipal); - } - try { - saslTransport = new TSaslClientTransport( - method.getMechanismName(), - null, - names[0], names[1], - saslProps, null, - underlyingTransport); - return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); - } catch (SaslException se) { - throw new IOException("Could not instantiate SASL transport", se); - } - - default: - throw new IOException("Unsupported authentication method: " + method); - } - } - private static class SaslClientCallbackHandler implements CallbackHandler { - private final String userName; - private final char[] userPassword; - - public SaslClientCallbackHandler(Token token) { - this.userName = encodeIdentifier(token.getIdentifier()); - this.userPassword = encodePassword(token.getPassword()); - } - - @Override - public void handle(Callback[] callbacks) - throws UnsupportedCallbackException { - NameCallback nc = null; - PasswordCallback pc = null; - RealmCallback rc = null; - for (Callback callback : callbacks) { - if (callback instanceof RealmChoiceCallback) { - continue; - } else if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - } else if (callback instanceof RealmCallback) { - rc = (RealmCallback) callback; - } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL client callback"); - } - } - if (nc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting username: " + userName); - } - nc.setName(userName); - } - if (pc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting userPassword"); - } - pc.setPassword(userPassword); - } - if (rc != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("SASL client callback: setting realm: " - + rc.getDefaultText()); - } - rc.setText(rc.getDefaultText()); - } - } - - static String encodeIdentifier(byte[] identifier) { - return new String(Base64.encodeBase64(identifier)); - } - - static char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); - } - } - } - - public static class Server extends HadoopThriftAuthBridge.Server { - final UserGroupInformation realUgi; - DelegationTokenSecretManager secretManager; - private final static long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour - //Delegation token related keys - public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = - "hive.cluster.delegation.key.update-interval"; - public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = - 24*60*60*1000; // 1 day - public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = - "hive.cluster.delegation.token.renew-interval"; - public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = - 24*60*60*1000; // 1 day - public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = - "hive.cluster.delegation.token.max-lifetime"; - public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = - 7*24*60*60*1000; // 7 days - public static final String DELEGATION_TOKEN_STORE_CLS = - "hive.cluster.delegation.token.store.class"; - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR = - "hive.cluster.delegation.token.store.zookeeper.connectString"; - // alternate connect string specification configuration - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE = - "hive.zookeeper.quorum"; - - public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS = - "hive.cluster.delegation.token.store.zookeeper.connectTimeoutMillis"; - public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE = - "hive.cluster.delegation.token.store.zookeeper.znode"; - public static final String DELEGATION_TOKEN_STORE_ZK_ACL = - "hive.cluster.delegation.token.store.zookeeper.acl"; - public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT = - "/hivedelegation"; - - public Server() throws TTransportException { - try { - realUgi = UserGroupInformation.getCurrentUser(); - } catch (IOException ioe) { - throw new TTransportException(ioe); - } - } - /** - * Create a server with a kerberos keytab/principal. - */ - protected Server(String keytabFile, String principalConf) - throws TTransportException { - if (keytabFile == null || keytabFile.isEmpty()) { - throw new TTransportException("No keytab specified"); - } - if (principalConf == null || principalConf.isEmpty()) { - throw new TTransportException("No principal specified"); - } - - // Login from the keytab - String kerberosName; - try { - kerberosName = - SecurityUtil.getServerPrincipal(principalConf, "0.0.0.0"); - UserGroupInformation.loginUserFromKeytab( - kerberosName, keytabFile); - realUgi = UserGroupInformation.getLoginUser(); - assert realUgi.isFromKeytab(); - } catch (IOException ioe) { - throw new TTransportException(ioe); - } - } - - /** - * Create a TTransportFactory that, upon connection of a client socket, - * negotiates a Kerberized SASL transport. The resulting TTransportFactory - * can be passed as both the input and output transport factory when - * instantiating a TThreadPoolServer, for example. - * - * @param saslProps Map of SASL properties - */ - @Override - public TTransportFactory createTransportFactory(Map saslProps) - throws TTransportException { - // Parse out the kerberos principal, host, realm. - String kerberosName = realUgi.getUserName(); - final String names[] = SaslRpcServer.splitKerberosName(kerberosName); - if (names.length != 3) { - throw new TTransportException("Kerberos principal should have 3 parts: " + kerberosName); - } - - TSaslServerTransport.Factory transFactory = new TSaslServerTransport.Factory(); - transFactory.addServerDefinition( - AuthMethod.KERBEROS.getMechanismName(), - names[0], names[1], // two parts of kerberos principal - saslProps, - new SaslRpcServer.SaslGssCallbackHandler()); - transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(), - null, SaslRpcServer.SASL_DEFAULT_REALM, - saslProps, new SaslDigestCallbackHandler(secretManager)); - - return new TUGIAssumingTransportFactory(transFactory, realUgi); - } - - /** - * Wrap a TProcessor in such a way that, before processing any RPC, it - * assumes the UserGroupInformation of the user authenticated by - * the SASL transport. - */ - @Override - public TProcessor wrapProcessor(TProcessor processor) { - return new TUGIAssumingProcessor(processor, secretManager, true); - } - - /** - * Wrap a TProcessor to capture the client information like connecting userid, ip etc - */ - @Override - public TProcessor wrapNonAssumingProcessor(TProcessor processor) { - return new TUGIAssumingProcessor(processor, secretManager, false); - } - - protected DelegationTokenStore getTokenStore(Configuration conf) - throws IOException { - String tokenStoreClassName = conf.get(DELEGATION_TOKEN_STORE_CLS, ""); - if (StringUtils.isBlank(tokenStoreClassName)) { - return new MemoryTokenStore(); - } - try { - Class storeClass = Class - .forName(tokenStoreClassName).asSubclass( - DelegationTokenStore.class); - return ReflectionUtils.newInstance(storeClass, conf); - } catch (ClassNotFoundException e) { - throw new IOException("Error initializing delegation token store: " + tokenStoreClassName, - e); - } - } - - @Override - public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode) - throws IOException{ - long secretKeyInterval = - conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, - DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); - long tokenMaxLifetime = - conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, - DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); - long tokenRenewInterval = - conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, - DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); - - DelegationTokenStore dts = getTokenStore(conf); - dts.init(rawStore, smode); - secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval, - tokenMaxLifetime, - tokenRenewInterval, - DELEGATION_TOKEN_GC_INTERVAL, dts); - secretManager.startThreads(); - } - - @Override - public String getDelegationToken(final String owner, final String renewer) - throws IOException, InterruptedException { - if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { - throw new AuthorizationException( - "Delegation Token can be issued only with kerberos authentication. " + - "Current AuthenticationMethod: " + authenticationMethod.get() - ); - } - //if the user asking the token is same as the 'owner' then don't do - //any proxy authorization checks. For cases like oozie, where it gets - //a delegation token for another user, we need to make sure oozie is - //authorized to get a delegation token. - //Do all checks on short names - UserGroupInformation currUser = UserGroupInformation.getCurrentUser(); - UserGroupInformation ownerUgi = UserGroupInformation.createRemoteUser(owner); - if (!ownerUgi.getShortUserName().equals(currUser.getShortUserName())) { - //in the case of proxy users, the getCurrentUser will return the - //real user (for e.g. oozie) due to the doAs that happened just before the - //server started executing the method getDelegationToken in the MetaStore - ownerUgi = UserGroupInformation.createProxyUser(owner, - UserGroupInformation.getCurrentUser()); - InetAddress remoteAddr = getRemoteAddress(); - ProxyUsers.authorize(ownerUgi,remoteAddr.getHostAddress(), null); - } - return ownerUgi.doAs(new PrivilegedExceptionAction() { - @Override - public String run() throws IOException { - return secretManager.getDelegationToken(renewer); - } - }); - } - - @Override - public String getDelegationTokenWithService(String owner, String renewer, String service) - throws IOException, InterruptedException { - String token = getDelegationToken(owner, renewer); - return ShimLoader.getHadoopShims().addServiceToToken(token, service); - } - - @Override - public long renewDelegationToken(String tokenStrForm) throws IOException { - if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { - throw new AuthorizationException( - "Delegation Token can be issued only with kerberos authentication. " + - "Current AuthenticationMethod: " + authenticationMethod.get() - ); - } - return secretManager.renewDelegationToken(tokenStrForm); - } - - @Override - public String getUserFromToken(String tokenStr) throws IOException { - return secretManager.getUserFromToken(tokenStr); - } - - @Override - public void cancelDelegationToken(String tokenStrForm) throws IOException { - secretManager.cancelDelegationToken(tokenStrForm); - } - - final static ThreadLocal remoteAddress = - new ThreadLocal() { - @Override - protected synchronized InetAddress initialValue() { - return null; - } - }; - - @Override - public InetAddress getRemoteAddress() { - return remoteAddress.get(); - } - - final static ThreadLocal authenticationMethod = - new ThreadLocal() { - @Override - protected synchronized AuthenticationMethod initialValue() { - return AuthenticationMethod.TOKEN; - } - }; - - private static ThreadLocal remoteUser = new ThreadLocal () { - @Override - protected synchronized String initialValue() { - return null; - } - }; - - @Override - public String getRemoteUser() { - return remoteUser.get(); - } - - /** CallbackHandler for SASL DIGEST-MD5 mechanism */ - // This code is pretty much completely based on Hadoop's - // SaslRpcServer.SaslDigestCallbackHandler - the only reason we could not - // use that Hadoop class as-is was because it needs a Server.Connection object - // which is relevant in hadoop rpc but not here in the metastore - so the - // code below does not deal with the Connection Server.object. - static class SaslDigestCallbackHandler implements CallbackHandler { - private final DelegationTokenSecretManager secretManager; - - public SaslDigestCallbackHandler( - DelegationTokenSecretManager secretManager) { - this.secretManager = secretManager; - } - - private char[] getPassword(DelegationTokenIdentifier tokenid) throws InvalidToken { - return encodePassword(secretManager.retrievePassword(tokenid)); - } - - private char[] encodePassword(byte[] password) { - return new String(Base64.encodeBase64(password)).toCharArray(); - } - /** {@inheritDoc} */ - @Override - public void handle(Callback[] callbacks) throws InvalidToken, - UnsupportedCallbackException { - NameCallback nc = null; - PasswordCallback pc = null; - AuthorizeCallback ac = null; - for (Callback callback : callbacks) { - if (callback instanceof AuthorizeCallback) { - ac = (AuthorizeCallback) callback; - } else if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - } else if (callback instanceof RealmCallback) { - continue; // realm is ignored - } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL DIGEST-MD5 Callback"); - } - } - if (pc != null) { - DelegationTokenIdentifier tokenIdentifier = SaslRpcServer. - getIdentifier(nc.getDefaultName(), secretManager); - char[] password = getPassword(tokenIdentifier); - - if (LOG.isDebugEnabled()) { - LOG.debug("SASL server DIGEST-MD5 callback: setting password " - + "for client: " + tokenIdentifier.getUser()); - } - pc.setPassword(password); - } - if (ac != null) { - String authid = ac.getAuthenticationID(); - String authzid = ac.getAuthorizationID(); - if (authid.equals(authzid)) { - ac.setAuthorized(true); - } else { - ac.setAuthorized(false); - } - if (ac.isAuthorized()) { - if (LOG.isDebugEnabled()) { - String username = - SaslRpcServer.getIdentifier(authzid, secretManager).getUser().getUserName(); - LOG.debug("SASL server DIGEST-MD5 callback: setting " - + "canonicalized client ID: " + username); - } - ac.setAuthorizedID(authzid); - } - } - } - } - - /** - * Processor that pulls the SaslServer object out of the transport, and - * assumes the remote user's UGI before calling through to the original - * processor. - * - * This is used on the server side to set the UGI for each specific call. - */ - protected class TUGIAssumingProcessor implements TProcessor { - final TProcessor wrapped; - DelegationTokenSecretManager secretManager; - boolean useProxy; - TUGIAssumingProcessor(TProcessor wrapped, DelegationTokenSecretManager secretManager, - boolean useProxy) { - this.wrapped = wrapped; - this.secretManager = secretManager; - this.useProxy = useProxy; - } - - @Override - public boolean process(final TProtocol inProt, final TProtocol outProt) throws TException { - TTransport trans = inProt.getTransport(); - if (!(trans instanceof TSaslServerTransport)) { - throw new TException("Unexpected non-SASL transport " + trans.getClass()); - } - TSaslServerTransport saslTrans = (TSaslServerTransport)trans; - SaslServer saslServer = saslTrans.getSaslServer(); - String authId = saslServer.getAuthorizationID(); - authenticationMethod.set(AuthenticationMethod.KERBEROS); - LOG.debug("AUTH ID ======>" + authId); - String endUser = authId; - - if(saslServer.getMechanismName().equals("DIGEST-MD5")) { - try { - TokenIdentifier tokenId = SaslRpcServer.getIdentifier(authId, - secretManager); - endUser = tokenId.getUser().getUserName(); - authenticationMethod.set(AuthenticationMethod.TOKEN); - } catch (InvalidToken e) { - throw new TException(e.getMessage()); - } - } - Socket socket = ((TSocket)(saslTrans.getUnderlyingTransport())).getSocket(); - remoteAddress.set(socket.getInetAddress()); - UserGroupInformation clientUgi = null; - try { - if (useProxy) { - clientUgi = UserGroupInformation.createProxyUser( - endUser, UserGroupInformation.getLoginUser()); - remoteUser.set(clientUgi.getShortUserName()); - LOG.debug("Set remoteUser :" + remoteUser.get()); - return clientUgi.doAs(new PrivilegedExceptionAction() { - @Override - public Boolean run() { - try { - return wrapped.process(inProt, outProt); - } catch (TException te) { - throw new RuntimeException(te); - } - } - }); - } else { - // use the short user name for the request - UserGroupInformation endUserUgi = UserGroupInformation.createRemoteUser(endUser); - remoteUser.set(endUserUgi.getShortUserName()); - LOG.debug("Set remoteUser :" + remoteUser.get() + ", from endUser :" + endUser); - return wrapped.process(inProt, outProt); - } - } catch (RuntimeException rte) { - if (rte.getCause() instanceof TException) { - throw (TException)rte.getCause(); - } - throw rte; - } catch (InterruptedException ie) { - throw new RuntimeException(ie); // unexpected! - } catch (IOException ioe) { - throw new RuntimeException(ioe); // unexpected! - } - finally { - if (clientUgi != null) { - try { FileSystem.closeAllForUGI(clientUgi); } - catch(IOException exception) { - LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, exception); - } - } - } - } - } - - /** - * A TransportFactory that wraps another one, but assumes a specified UGI - * before calling through. - * - * This is used on the server side to assume the server's Principal when accepting - * clients. - */ - static class TUGIAssumingTransportFactory extends TTransportFactory { - private final UserGroupInformation ugi; - private final TTransportFactory wrapped; - - public TUGIAssumingTransportFactory(TTransportFactory wrapped, UserGroupInformation ugi) { - assert wrapped != null; - assert ugi != null; - - this.wrapped = wrapped; - this.ugi = ugi; - } - - @Override - public TTransport getTransport(final TTransport trans) { - return ugi.doAs(new PrivilegedAction() { - @Override - public TTransport run() { - return wrapped.getTransport(trans); - } - }); - } - } - } -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java deleted file mode 100644 index cf60b7c..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Default in-memory token store implementation. - */ -public class MemoryTokenStore implements DelegationTokenStore { - - private final Map masterKeys - = new ConcurrentHashMap(); - - private final ConcurrentHashMap tokens - = new ConcurrentHashMap(); - - private final AtomicInteger masterKeySeq = new AtomicInteger(); - private Configuration conf; - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public Configuration getConf() { - return this.conf; - } - - @Override - public int addMasterKey(String s) { - int keySeq = masterKeySeq.getAndIncrement(); - masterKeys.put(keySeq, s); - return keySeq; - } - - @Override - public void updateMasterKey(int keySeq, String s) { - masterKeys.put(keySeq, s); - } - - @Override - public boolean removeMasterKey(int keySeq) { - return masterKeys.remove(keySeq) != null; - } - - @Override - public String[] getMasterKeys() { - return masterKeys.values().toArray(new String[0]); - } - - @Override - public boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) { - DelegationTokenInformation tokenInfo = tokens.putIfAbsent(tokenIdentifier, token); - return (tokenInfo == null); - } - - @Override - public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { - DelegationTokenInformation tokenInfo = tokens.remove(tokenIdentifier); - return tokenInfo != null; - } - - @Override - public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { - return tokens.get(tokenIdentifier); - } - - @Override - public List getAllDelegationTokenIdentifiers() { - List result = new ArrayList( - tokens.size()); - for (DelegationTokenIdentifier id : tokens.keySet()) { - result.add(id); - } - return result; - } - - @Override - public void close() throws IOException { - //no-op - } - - @Override - public void init(Object hmsHandler, ServerMode smode) throws TokenStoreException { - // no-op - } -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java deleted file mode 100644 index 8146d51..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java +++ /dev/null @@ -1,338 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; -import org.apache.hadoop.security.token.delegation.DelegationKey; -import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; -import org.apache.hadoop.util.Daemon; -import org.apache.hadoop.util.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Extension of {@link DelegationTokenSecretManager} to support alternative to default in-memory - * token management for fail-over and clustering through plug-able token store (ZooKeeper etc.). - * Delegation tokens will be retrieved from the store on-demand and (unlike base class behavior) not - * cached in memory. This avoids complexities related to token expiration. The security token is - * needed only at the time the transport is opened (as opposed to per interface operation). The - * assumption therefore is low cost of interprocess token retrieval (for random read efficient store - * such as ZooKeeper) compared to overhead of synchronizing per-process in-memory token caches. - * The wrapper incorporates the token store abstraction within the limitations of current - * Hive/Hadoop dependency (.20S) with minimum code duplication. - * Eventually this should be supported by Hadoop security directly. - */ -public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecretManager { - - private static final Logger LOGGER = - LoggerFactory.getLogger(TokenStoreDelegationTokenSecretManager.class.getName()); - - final private long keyUpdateInterval; - final private long tokenRemoverScanInterval; - private Thread tokenRemoverThread; - - final private DelegationTokenStore tokenStore; - - public TokenStoreDelegationTokenSecretManager(long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval, - DelegationTokenStore sharedStore) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, - delegationTokenRemoverScanInterval); - this.keyUpdateInterval = delegationKeyUpdateInterval; - this.tokenRemoverScanInterval = delegationTokenRemoverScanInterval; - - this.tokenStore = sharedStore; - } - - protected DelegationTokenIdentifier getTokenIdentifier(Token token) - throws IOException { - // turn bytes back into identifier for cache lookup - ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); - DataInputStream in = new DataInputStream(buf); - DelegationTokenIdentifier id = createIdentifier(); - id.readFields(in); - return id; - } - - protected Map reloadKeys() { - // read keys from token store - String[] allKeys = tokenStore.getMasterKeys(); - Map keys - = new HashMap(allKeys.length); - for (String keyStr : allKeys) { - DelegationKey key = new DelegationKey(); - try { - decodeWritable(key, keyStr); - keys.put(key.getKeyId(), key); - } catch (IOException ex) { - LOGGER.error("Failed to load master key.", ex); - } - } - synchronized (this) { - super.allKeys.clear(); - super.allKeys.putAll(keys); - } - return keys; - } - - @Override - public byte[] retrievePassword(DelegationTokenIdentifier identifier) throws InvalidToken { - DelegationTokenInformation info = this.tokenStore.getToken(identifier); - if (info == null) { - throw new InvalidToken("token expired or does not exist: " + identifier); - } - // must reuse super as info.getPassword is not accessible - synchronized (this) { - try { - super.currentTokens.put(identifier, info); - return super.retrievePassword(identifier); - } finally { - super.currentTokens.remove(identifier); - } - } - } - - @Override - public DelegationTokenIdentifier cancelToken(Token token, - String canceller) throws IOException { - DelegationTokenIdentifier id = getTokenIdentifier(token); - LOGGER.info("Token cancelation requested for identifier: "+id); - this.tokenStore.removeToken(id); - return id; - } - - /** - * Create the password and add it to shared store. - */ - @Override - protected byte[] createPassword(DelegationTokenIdentifier id) { - byte[] password; - DelegationTokenInformation info; - synchronized (this) { - password = super.createPassword(id); - // add new token to shared store - // need to persist expiration along with password - info = super.currentTokens.remove(id); - if (info == null) { - throw new IllegalStateException("Failed to retrieve token after creation"); - } - } - this.tokenStore.addToken(id, info); - return password; - } - - @Override - public long renewToken(Token token, - String renewer) throws InvalidToken, IOException { - // since renewal is KERBEROS authenticated token may not be cached - final DelegationTokenIdentifier id = getTokenIdentifier(token); - DelegationTokenInformation tokenInfo = this.tokenStore.getToken(id); - if (tokenInfo == null) { - throw new InvalidToken("token does not exist: " + id); // no token found - } - // ensure associated master key is available - if (!super.allKeys.containsKey(id.getMasterKeyId())) { - LOGGER.info("Unknown master key (id={}), (re)loading keys from token store.", - id.getMasterKeyId()); - reloadKeys(); - } - // reuse super renewal logic - synchronized (this) { - super.currentTokens.put(id, tokenInfo); - try { - return super.renewToken(token, renewer); - } finally { - super.currentTokens.remove(id); - } - } - } - - public static String encodeWritable(Writable key) throws IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(bos); - key.write(dos); - dos.flush(); - return Base64.encodeBase64URLSafeString(bos.toByteArray()); - } - - public static void decodeWritable(Writable w, String idStr) throws IOException { - DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr))); - w.readFields(in); - } - - /** - * Synchronize master key updates / sequence generation for multiple nodes. - * NOTE: {@Link AbstractDelegationTokenSecretManager} keeps currentKey private, so we need - * to utilize this "hook" to manipulate the key through the object reference. - * This .20S workaround should cease to exist when Hadoop supports token store. - */ - @Override - protected void logUpdateMasterKey(DelegationKey key) throws IOException { - int keySeq = this.tokenStore.addMasterKey(encodeWritable(key)); - // update key with assigned identifier - DelegationKey keyWithSeq = new DelegationKey(keySeq, key.getExpiryDate(), key.getKey()); - String keyStr = encodeWritable(keyWithSeq); - this.tokenStore.updateMasterKey(keySeq, keyStr); - decodeWritable(key, keyStr); - LOGGER.info("New master key with key id={}", key.getKeyId()); - super.logUpdateMasterKey(key); - } - - @Override - public synchronized void startThreads() throws IOException { - try { - // updateCurrentKey needs to be called to initialize the master key - // (there should be a null check added in the future in rollMasterKey) - // updateCurrentKey(); - Method m = AbstractDelegationTokenSecretManager.class.getDeclaredMethod("updateCurrentKey"); - m.setAccessible(true); - m.invoke(this); - } catch (Exception e) { - throw new IOException("Failed to initialize master key", e); - } - running = true; - tokenRemoverThread = new Daemon(new ExpiredTokenRemover()); - tokenRemoverThread.start(); - } - - @Override - public synchronized void stopThreads() { - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Stopping expired delegation token remover thread"); - } - running = false; - if (tokenRemoverThread != null) { - tokenRemoverThread.interrupt(); - } - } - - /** - * Remove expired tokens. Replaces logic in {@link AbstractDelegationTokenSecretManager} - * that cannot be reused due to private method access. Logic here can more efficiently - * deal with external token store by only loading into memory the minimum data needed. - */ - protected void removeExpiredTokens() { - long now = System.currentTimeMillis(); - Iterator i = tokenStore.getAllDelegationTokenIdentifiers() - .iterator(); - while (i.hasNext()) { - DelegationTokenIdentifier id = i.next(); - if (now > id.getMaxDate()) { - this.tokenStore.removeToken(id); // no need to look at token info - } else { - // get token info to check renew date - DelegationTokenInformation tokenInfo = tokenStore.getToken(id); - if (tokenInfo != null) { - if (now > tokenInfo.getRenewDate()) { - this.tokenStore.removeToken(id); - } - } - } - } - } - - /** - * Extension of rollMasterKey to remove expired keys from store. - * - * @throws IOException - */ - protected void rollMasterKeyExt() throws IOException { - Map keys = reloadKeys(); - int currentKeyId = super.currentId; - HiveDelegationTokenSupport.rollMasterKey(TokenStoreDelegationTokenSecretManager.this); - List keysAfterRoll = Arrays.asList(getAllKeys()); - for (DelegationKey key : keysAfterRoll) { - keys.remove(key.getKeyId()); - if (key.getKeyId() == currentKeyId) { - tokenStore.updateMasterKey(currentKeyId, encodeWritable(key)); - } - } - for (DelegationKey expiredKey : keys.values()) { - LOGGER.info("Removing expired key id={}", expiredKey.getKeyId()); - try { - tokenStore.removeMasterKey(expiredKey.getKeyId()); - } catch (Exception e) { - LOGGER.error("Error removing expired key id={}", expiredKey.getKeyId(), e); - } - } - } - - /** - * Cloned from {@link AbstractDelegationTokenSecretManager} to deal with private access - * restriction (there would not be an need to clone the remove thread if the remove logic was - * protected/extensible). - */ - protected class ExpiredTokenRemover extends Thread { - private long lastMasterKeyUpdate; - private long lastTokenCacheCleanup; - - @Override - public void run() { - LOGGER.info("Starting expired delegation token remover thread, " - + "tokenRemoverScanInterval=" + tokenRemoverScanInterval - / (60 * 1000) + " min(s)"); - try { - while (running) { - long now = System.currentTimeMillis(); - if (lastMasterKeyUpdate + keyUpdateInterval < now) { - try { - rollMasterKeyExt(); - lastMasterKeyUpdate = now; - } catch (IOException e) { - LOGGER.error("Master key updating failed. " - + StringUtils.stringifyException(e)); - } - } - if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) { - removeExpiredTokens(); - lastTokenCacheCleanup = now; - } - try { - Thread.sleep(5000); // 5 seconds - } catch (InterruptedException ie) { - LOGGER - .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread " - + ie); - } - } - } catch (Throwable t) { - LOGGER.error("ExpiredTokenRemover thread received unexpected exception. " - + t, t); - Runtime.getRuntime().exit(-1); - } - } - } - -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java deleted file mode 100644 index 91e4a69..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java +++ /dev/null @@ -1,471 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; -import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.framework.api.ACLProvider; -import org.apache.curator.framework.imps.CuratorFrameworkState; -import org.apache.curator.retry.ExponentialBackoffRetry; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; -import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooDefs; -import org.apache.zookeeper.ZooDefs.Ids; -import org.apache.zookeeper.ZooDefs.Perms; -import org.apache.zookeeper.data.ACL; -import org.apache.zookeeper.data.Id; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * ZooKeeper token store implementation. - */ -public class ZooKeeperTokenStore implements DelegationTokenStore { - - private static final Logger LOGGER = - LoggerFactory.getLogger(ZooKeeperTokenStore.class.getName()); - - protected static final String ZK_SEQ_FORMAT = "%010d"; - private static final String NODE_KEYS = "/keys"; - private static final String NODE_TOKENS = "/tokens"; - - private String rootNode = ""; - private volatile CuratorFramework zkSession; - private String zkConnectString; - private int connectTimeoutMillis; - private List newNodeAcl = Arrays.asList(new ACL(Perms.ALL, Ids.AUTH_IDS)); - - /** - * ACLProvider permissions will be used in case parent dirs need to be created - */ - private final ACLProvider aclDefaultProvider = new ACLProvider() { - - @Override - public List getDefaultAcl() { - return newNodeAcl; - } - - @Override - public List getAclForPath(String path) { - return getDefaultAcl(); - } - }; - - - private ServerMode serverMode; - - private final String WHEN_ZK_DSTORE_MSG = "when zookeeper based delegation token storage is enabled" - + "(hive.cluster.delegation.token.store.class=" + ZooKeeperTokenStore.class.getName() + ")"; - - private Configuration conf; - - /** - * Default constructor for dynamic instantiation w/ Configurable - * (ReflectionUtils does not support Configuration constructor injection). - */ - protected ZooKeeperTokenStore() { - } - - private CuratorFramework getSession() { - if (zkSession == null || zkSession.getState() == CuratorFrameworkState.STOPPED) { - synchronized (this) { - if (zkSession == null || zkSession.getState() == CuratorFrameworkState.STOPPED) { - zkSession = - CuratorFrameworkFactory.builder().connectString(zkConnectString) - .connectionTimeoutMs(connectTimeoutMillis).aclProvider(aclDefaultProvider) - .retryPolicy(new ExponentialBackoffRetry(1000, 3)).build(); - zkSession.start(); - } - } - } - return zkSession; - } - - private void setupJAASConfig(Configuration conf) throws IOException { - if (!UserGroupInformation.getLoginUser().isFromKeytab()) { - // The process has not logged in using keytab - // this should be a test mode, can't use keytab to authenticate - // with zookeeper. - LOGGER.warn("Login is not from keytab"); - return; - } - - String principal; - String keytab; - switch (serverMode) { - case METASTORE: - principal = getNonEmptyConfVar(conf, "hive.metastore.kerberos.principal"); - keytab = getNonEmptyConfVar(conf, "hive.metastore.kerberos.keytab.file"); - break; - case HIVESERVER2: - principal = getNonEmptyConfVar(conf, "hive.server2.authentication.kerberos.principal"); - keytab = getNonEmptyConfVar(conf, "hive.server2.authentication.kerberos.keytab"); - break; - default: - throw new AssertionError("Unexpected server mode " + serverMode); - } - ShimLoader.getHadoopShims().setZookeeperClientKerberosJaasConfig(principal, keytab); - } - - private String getNonEmptyConfVar(Configuration conf, String param) throws IOException { - String val = conf.get(param); - if (val == null || val.trim().isEmpty()) { - throw new IOException("Configuration parameter " + param + " should be set, " - + WHEN_ZK_DSTORE_MSG); - } - return val; - } - - /** - * Create a path if it does not already exist ("mkdir -p") - * @param path string with '/' separator - * @param acl list of ACL entries - * @throws TokenStoreException - */ - public void ensurePath(String path, List acl) - throws TokenStoreException { - try { - CuratorFramework zk = getSession(); - String node = zk.create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT) - .withACL(acl).forPath(path); - LOGGER.info("Created path: {} ", node); - } catch (KeeperException.NodeExistsException e) { - // node already exists - } catch (Exception e) { - throw new TokenStoreException("Error creating path " + path, e); - } - } - - /** - * Parse ACL permission string, from ZooKeeperMain private method - * @param permString - * @return - */ - public static int getPermFromString(String permString) { - int perm = 0; - for (int i = 0; i < permString.length(); i++) { - switch (permString.charAt(i)) { - case 'r': - perm |= ZooDefs.Perms.READ; - break; - case 'w': - perm |= ZooDefs.Perms.WRITE; - break; - case 'c': - perm |= ZooDefs.Perms.CREATE; - break; - case 'd': - perm |= ZooDefs.Perms.DELETE; - break; - case 'a': - perm |= ZooDefs.Perms.ADMIN; - break; - default: - LOGGER.error("Unknown perm type: " + permString.charAt(i)); - } - } - return perm; - } - - /** - * Parse comma separated list of ACL entries to secure generated nodes, e.g. - * sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa - * @param aclString - * @return ACL list - */ - public static List parseACLs(String aclString) { - String[] aclComps = StringUtils.splitByWholeSeparator(aclString, ","); - List acl = new ArrayList(aclComps.length); - for (String a : aclComps) { - if (StringUtils.isBlank(a)) { - continue; - } - a = a.trim(); - // from ZooKeeperMain private method - int firstColon = a.indexOf(':'); - int lastColon = a.lastIndexOf(':'); - if (firstColon == -1 || lastColon == -1 || firstColon == lastColon) { - LOGGER.error(a + " does not have the form scheme:id:perm"); - continue; - } - ACL newAcl = new ACL(); - newAcl.setId(new Id(a.substring(0, firstColon), a.substring( - firstColon + 1, lastColon))); - newAcl.setPerms(getPermFromString(a.substring(lastColon + 1))); - acl.add(newAcl); - } - return acl; - } - - private void initClientAndPaths() { - if (this.zkSession != null) { - this.zkSession.close(); - } - try { - ensurePath(rootNode + NODE_KEYS, newNodeAcl); - ensurePath(rootNode + NODE_TOKENS, newNodeAcl); - } catch (TokenStoreException e) { - throw e; - } - } - - @Override - public void setConf(Configuration conf) { - if (conf == null) { - throw new IllegalArgumentException("conf is null"); - } - this.conf = conf; - } - - @Override - public Configuration getConf() { - return null; // not required - } - - private Map getAllKeys() throws KeeperException, InterruptedException { - - String masterKeyNode = rootNode + NODE_KEYS; - - // get children of key node - List nodes = zkGetChildren(masterKeyNode); - - // read each child node, add to results - Map result = new HashMap(); - for (String node : nodes) { - String nodePath = masterKeyNode + "/" + node; - byte[] data = zkGetData(nodePath); - if (data != null) { - result.put(getSeq(node), data); - } - } - return result; - } - - private List zkGetChildren(String path) { - CuratorFramework zk = getSession(); - try { - return zk.getChildren().forPath(path); - } catch (Exception e) { - throw new TokenStoreException("Error getting children for " + path, e); - } - } - - private byte[] zkGetData(String nodePath) { - CuratorFramework zk = getSession(); - try { - return zk.getData().forPath(nodePath); - } catch (KeeperException.NoNodeException ex) { - return null; - } catch (Exception e) { - throw new TokenStoreException("Error reading " + nodePath, e); - } - } - - private int getSeq(String path) { - String[] pathComps = path.split("/"); - return Integer.parseInt(pathComps[pathComps.length-1]); - } - - @Override - public int addMasterKey(String s) { - String keysPath = rootNode + NODE_KEYS + "/"; - CuratorFramework zk = getSession(); - String newNode; - try { - newNode = zk.create().withMode(CreateMode.PERSISTENT_SEQUENTIAL).withACL(newNodeAcl) - .forPath(keysPath, s.getBytes()); - } catch (Exception e) { - throw new TokenStoreException("Error creating new node with path " + keysPath, e); - } - LOGGER.info("Added key {}", newNode); - return getSeq(newNode); - } - - @Override - public void updateMasterKey(int keySeq, String s) { - CuratorFramework zk = getSession(); - String keyPath = rootNode + NODE_KEYS + "/" + String.format(ZK_SEQ_FORMAT, keySeq); - try { - zk.setData().forPath(keyPath, s.getBytes()); - } catch (Exception e) { - throw new TokenStoreException("Error setting data in " + keyPath, e); - } - } - - @Override - public boolean removeMasterKey(int keySeq) { - String keyPath = rootNode + NODE_KEYS + "/" + String.format(ZK_SEQ_FORMAT, keySeq); - zkDelete(keyPath); - return true; - } - - private void zkDelete(String path) { - CuratorFramework zk = getSession(); - try { - zk.delete().forPath(path); - } catch (KeeperException.NoNodeException ex) { - // already deleted - } catch (Exception e) { - throw new TokenStoreException("Error deleting " + path, e); - } - } - - @Override - public String[] getMasterKeys() { - try { - Map allKeys = getAllKeys(); - String[] result = new String[allKeys.size()]; - int resultIdx = 0; - for (byte[] keyBytes : allKeys.values()) { - result[resultIdx++] = new String(keyBytes); - } - return result; - } catch (KeeperException ex) { - throw new TokenStoreException(ex); - } catch (InterruptedException ex) { - throw new TokenStoreException(ex); - } - } - - - private String getTokenPath(DelegationTokenIdentifier tokenIdentifier) { - try { - return rootNode + NODE_TOKENS + "/" - + TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier); - } catch (IOException ex) { - throw new TokenStoreException("Failed to encode token identifier", ex); - } - } - - @Override - public boolean addToken(DelegationTokenIdentifier tokenIdentifier, - DelegationTokenInformation token) { - byte[] tokenBytes = HiveDelegationTokenSupport.encodeDelegationTokenInformation(token); - String tokenPath = getTokenPath(tokenIdentifier); - CuratorFramework zk = getSession(); - String newNode; - try { - newNode = zk.create().withMode(CreateMode.PERSISTENT).withACL(newNodeAcl) - .forPath(tokenPath, tokenBytes); - } catch (Exception e) { - throw new TokenStoreException("Error creating new node with path " + tokenPath, e); - } - - LOGGER.info("Added token: {}", newNode); - return true; - } - - @Override - public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { - String tokenPath = getTokenPath(tokenIdentifier); - zkDelete(tokenPath); - return true; - } - - @Override - public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { - byte[] tokenBytes = zkGetData(getTokenPath(tokenIdentifier)); - try { - return HiveDelegationTokenSupport.decodeDelegationTokenInformation(tokenBytes); - } catch (Exception ex) { - throw new TokenStoreException("Failed to decode token", ex); - } - } - - @Override - public List getAllDelegationTokenIdentifiers() { - String containerNode = rootNode + NODE_TOKENS; - final List nodes = zkGetChildren(containerNode); - List result = new java.util.ArrayList( - nodes.size()); - for (String node : nodes) { - DelegationTokenIdentifier id = new DelegationTokenIdentifier(); - try { - TokenStoreDelegationTokenSecretManager.decodeWritable(id, node); - result.add(id); - } catch (Exception e) { - LOGGER.warn("Failed to decode token '{}'", node); - } - } - return result; - } - - @Override - public void close() throws IOException { - if (this.zkSession != null) { - this.zkSession.close(); - } - } - - @Override - public void init(Object objectStore, ServerMode smode) { - this.serverMode = smode; - zkConnectString = - conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null); - if (zkConnectString == null || zkConnectString.trim().isEmpty()) { - // try alternate config param - zkConnectString = - conf.get( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE, - null); - if (zkConnectString == null || zkConnectString.trim().isEmpty()) { - throw new IllegalArgumentException("Zookeeper connect string has to be specifed through " - + "either " + HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR - + " or " - + HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE - + WHEN_ZK_DSTORE_MSG); - } - } - connectTimeoutMillis = - conf.getInt( - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS, - CuratorFrameworkFactory.builder().getConnectionTimeoutMs()); - String aclStr = conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ACL, null); - if (StringUtils.isNotBlank(aclStr)) { - this.newNodeAcl = parseACLs(aclStr); - } - rootNode = - conf.get(HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, - HadoopThriftAuthBridge20S.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT) + serverMode; - - try { - // Install the JAAS Configuration for the runtime - setupJAASConfig(conf); - } catch (IOException e) { - throw new TokenStoreException("Error setting up JAAS configuration for zookeeper client " - + e.getMessage(), e); - } - initClientAndPaths(); - } - -} diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java b/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java deleted file mode 100644 index fe18706..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.thrift.client; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; - -import org.apache.hadoop.hive.thrift.TFilterTransport; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; - -/** - * The Thrift SASL transports call Sasl.createSaslServer and Sasl.createSaslClient - * inside open(). So, we need to assume the correct UGI when the transport is opened - * so that the SASL mechanisms have access to the right principal. This transport - * wraps the Sasl transports to set up the right UGI context for open(). - * - * This is used on the client side, where the API explicitly opens a transport to - * the server. - */ - public class TUGIAssumingTransport extends TFilterTransport { - protected UserGroupInformation ugi; - - public TUGIAssumingTransport(TTransport wrapped, UserGroupInformation ugi) { - super(wrapped); - this.ugi = ugi; - } - - @Override - public void open() throws TTransportException { - try { - ugi.doAs(new PrivilegedExceptionAction() { - public Void run() { - try { - wrapped.open(); - } catch (TTransportException tte) { - // Wrap the transport exception in an RTE, since UGI.doAs() then goes - // and unwraps this for us out of the doAs block. We then unwrap one - // more time in our catch clause to get back the TTE. (ugh) - throw new RuntimeException(tte); - } - return null; - } - }); - } catch (IOException ioe) { - throw new RuntimeException("Received an ioe we never threw!", ioe); - } catch (InterruptedException ie) { - throw new RuntimeException("Received an ie we never threw!", ie); - } catch (RuntimeException rte) { - if (rte.getCause() instanceof TTransportException) { - throw (TTransportException)rte.getCause(); - } else { - throw rte; - } - } - } - } diff --git a/shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java b/shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java deleted file mode 100644 index 6b39a14..0000000 --- a/shims/common-secure/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.security.token.delegation; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; - -import org.apache.hadoop.io.WritableUtils; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; - -/** - * Workaround for serialization of {@link DelegationTokenInformation} through package access. - * Future version of Hadoop should add this to DelegationTokenInformation itself. - */ -public final class HiveDelegationTokenSupport { - - private HiveDelegationTokenSupport() {} - - public static byte[] encodeDelegationTokenInformation(DelegationTokenInformation token) { - try { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(bos); - WritableUtils.writeVInt(out, token.password.length); - out.write(token.password); - out.writeLong(token.renewDate); - out.flush(); - return bos.toByteArray(); - } catch (IOException ex) { - throw new RuntimeException("Failed to encode token.", ex); - } - } - - public static DelegationTokenInformation decodeDelegationTokenInformation(byte[] tokenBytes) - throws IOException { - DataInputStream in = new DataInputStream(new ByteArrayInputStream(tokenBytes)); - DelegationTokenInformation token = new DelegationTokenInformation(0, null); - int len = WritableUtils.readVInt(in); - token.password = new byte[len]; - in.readFully(token.password); - token.renewDate = in.readLong(); - return token; - } - - public static void rollMasterKey( - AbstractDelegationTokenSecretManager mgr) - throws IOException { - mgr.rollMasterKey(); - } - -} diff --git a/shims/common/pom.xml b/shims/common/pom.xml index f8d779d..739504d 100644 --- a/shims/common/pom.xml +++ b/shims/common/pom.xml @@ -53,13 +53,34 @@ org.apache.hadoop hadoop-core - ${hadoop-20.version} + ${hadoop-20S.version} true + commons-lang + commons-lang + ${commons-lang.version} + + org.apache.thrift libthrift ${libthrift.version} + + org.apache.curator + curator-framework + ${curator.version} + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + org.apache.hadoop + hadoop-core + + + diff --git a/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java b/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java index dbd9f38..61af229 100644 --- a/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java +++ b/shims/common/src/main/java/org/apache/hadoop/fs/DefaultFileAccess.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; /** @@ -51,9 +52,9 @@ public static void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) throws IOException, AccessControlException, LoginException { // Get the user/groups for checking permissions based on the current UGI. - UserGroupInformation currentUgi = ShimLoader.getHadoopShims().getUGIForConf(fs.getConf()); + UserGroupInformation currentUgi = Utils.getUGIForConf(fs.getConf()); DefaultFileAccess.checkFileAccess(fs, stat, action, - ShimLoader.getHadoopShims().getShortUserName(currentUgi), + currentUgi.getShortUserName(), Arrays.asList(currentUgi.getGroupNames())); } diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java index 289df92..afc71e3 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -23,19 +23,13 @@ import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.URI; -import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.security.AccessControlException; -import java.security.PrivilegedExceptionAction; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.TreeMap; -import javax.security.auth.login.LoginException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; @@ -54,6 +48,7 @@ import org.apache.hadoop.mapred.JobStatus; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobID; @@ -74,8 +69,6 @@ */ public interface HadoopShims { - static final Log LOG = LogFactory.getLog(HadoopShims.class); - /** * Constructs and Returns TaskAttempt Log Url * or null if the TaskLogServlet is not available @@ -125,148 +118,6 @@ MiniDFSShim getMiniDfs(Configuration conf, CombineFileInputFormatShim getCombineFileInputFormat(); - String getInputFormatClassName(); - - int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, - String archiveName) throws Exception; - - public URI getHarUri(URI original, URI base, URI originalBase) - throws URISyntaxException; - /** - * Hive uses side effect files exclusively for it's output. It also manages - * the setup/cleanup/commit of output from the hive client. As a result it does - * not need support for the same inside the MR framework - * - * This routine sets the appropriate options related to bypass setup/cleanup/commit - * support in the MR framework, but does not set the OutputFormat class. - */ - void prepareJobOutput(JobConf conf); - - /** - * Used by TaskLogProcessor to Remove HTML quoting from a string - * @param item the string to unquote - * @return the unquoted string - * - */ - public String unquoteHtmlChars(String item); - - - - public void closeAllForUGI(UserGroupInformation ugi); - - /** - * Get the UGI that the given job configuration will run as. - * - * In secure versions of Hadoop, this simply returns the current - * access control context's user, ignoring the configuration. - */ - public UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException; - - /** - * Used by metastore server to perform requested rpc in client context. - * @param - * @param ugi - * @param pvea - * @throws IOException - * @throws InterruptedException - */ - public T doAs(UserGroupInformation ugi, PrivilegedExceptionAction pvea) throws - IOException, InterruptedException; - - /** - * Once a delegation token is stored in a file, the location is specified - * for a child process that runs hadoop operations, using an environment - * variable . - * @return Return the name of environment variable used by hadoop to find - * location of token file - */ - public String getTokenFileLocEnvName(); - - - /** - * Get delegation token from filesystem and write the token along with - * metastore tokens into a file - * @param conf - * @return Path of the file with token credential - * @throws IOException - */ - public Path createDelegationTokenFile(final Configuration conf) throws IOException; - - - /** - * Used to creates UGI object for a remote user. - * @param userName remote User Name - * @param groupNames group names associated with remote user name - * @return UGI created for the remote user. - */ - public UserGroupInformation createRemoteUser(String userName, List groupNames); - - /** - * Get the short name corresponding to the subject in the passed UGI - * - * In secure versions of Hadoop, this returns the short name (after - * undergoing the translation in the kerberos name rule mapping). - * In unsecure versions of Hadoop, this returns the name of the subject - */ - public String getShortUserName(UserGroupInformation ugi); - - /** - * Return true if the Shim is based on Hadoop Security APIs. - */ - public boolean isSecureShimImpl(); - - /** - * Return true if the hadoop configuration has security enabled - * @return - */ - public boolean isSecurityEnabled(); - - /** - * Get the string form of the token given a token signature. - * The signature is used as the value of the "service" field in the token for lookup. - * Ref: AbstractDelegationTokenSelector in Hadoop. If there exists such a token - * in the token cache (credential store) of the job, the lookup returns that. - * This is relevant only when running against a "secure" hadoop release - * The method gets hold of the tokens if they are set up by hadoop - this should - * happen on the map/reduce tasks if the client added the tokens into hadoop's - * credential store in the front end during job submission. The method will - * select the hive delegation token among the set of tokens and return the string - * form of it - * @param tokenSignature - * @return the string form of the token found - * @throws IOException - */ - public String getTokenStrForm(String tokenSignature) throws IOException; - - /** - * Dynamically sets up the JAAS configuration that uses kerberos - * @param principal - * @param keyTabFile - * @throws IOException - */ - public void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) - throws IOException; - - /** - * Add a delegation token to the given ugi - * @param ugi - * @param tokenStr - * @param tokenService - * @throws IOException - */ - public void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) - throws IOException; - - /** - * Add given service to the string format token - * @param tokenStr - * @param tokenService - * @return - * @throws IOException - */ - public String addServiceToToken(String tokenStr, String tokenService) - throws IOException; - enum JobTrackerState { INITIALIZING, RUNNING }; /** @@ -315,43 +166,6 @@ public String addServiceToToken(String tokenStr, String tokenService) */ public String getJobLauncherHttpAddress(Configuration conf); - - /** - * Perform kerberos login using the given principal and keytab - * @throws IOException - */ - public void loginUserFromKeytab(String principal, String keytabFile) throws IOException; - - /** - * Perform kerberos login using the given principal and keytab, - * and return the UGI object - * @throws IOException - */ - public UserGroupInformation loginUserFromKeytabAndReturnUGI(String principal, - String keytabFile) throws IOException; - - /** - * Convert Kerberos principal name pattern to valid Kerberos principal names. - * @param principal (principal name pattern) - * @return - * @throws IOException - */ - public String getResolvedPrincipal(String principal) throws IOException; - - /** - * Perform kerberos re-login using the given principal and keytab, to renew - * the credentials - * @throws IOException - */ - public void reLoginUserFromKeytab() throws IOException; - - /*** - * Check if the current UGI is keytab based - * @return - * @throws IOException - */ - public boolean isLoginKeytabBased() throws IOException; - /** * Move the directory/file to trash. In case of the symlinks or mount points, the file is * moved to the trashbin in the actual volume of the path p being deleted @@ -392,20 +206,6 @@ public void refreshDefaultQueue(Configuration conf, String userName) throws IOException; /** - * Create the proxy ugi for the given userid - * @param userName - * @return - */ - public UserGroupInformation createProxyUser(String userName) throws IOException; - - /** - * Verify proxy access to given UGI for given user - * @param ugi - */ - public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUserUgi, - String ipAddress, Configuration conf) throws IOException; - - /** * The method sets to set the partition file has a different signature between * hadoop versions. * @param jobConf @@ -416,53 +216,6 @@ public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUser Comparator getLongComparator(); /** - * InputSplitShim. - * - */ - public interface InputSplitShim extends InputSplit { - JobConf getJob(); - - @Override - long getLength(); - - /** Returns an array containing the startoffsets of the files in the split. */ - long[] getStartOffsets(); - - /** Returns an array containing the lengths of the files in the split. */ - long[] getLengths(); - - /** Returns the start offset of the ith Path. */ - long getOffset(int i); - - /** Returns the length of the ith Path. */ - long getLength(int i); - - /** Returns the number of Paths in the split. */ - int getNumPaths(); - - /** Returns the ith Path. */ - Path getPath(int i); - - /** Returns all the Paths in the split. */ - Path[] getPaths(); - - /** Returns all the Paths where this input-split resides. */ - @Override - String[] getLocations() throws IOException; - - void shrinkSplit(long length); - - @Override - String toString(); - - @Override - void readFields(DataInput in) throws IOException; - - @Override - void write(DataOutput out) throws IOException; - } - - /** * CombineFileInputFormatShim. * * @param @@ -473,11 +226,11 @@ public void authorizeProxyAccess(String proxyUser, UserGroupInformation realUser void createPool(JobConf conf, PathFilter... filters); - InputSplitShim[] getSplits(JobConf job, int numSplits) throws IOException; + CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOException; - InputSplitShim getInputSplitShim() throws IOException; + CombineFileSplit getInputSplitShim() throws IOException; - RecordReader getRecordReader(JobConf job, InputSplitShim split, Reporter reporter, + RecordReader getRecordReader(JobConf job, CombineFileSplit split, Reporter reporter, Class> rrClass) throws IOException; } @@ -708,6 +461,13 @@ public void setFullFileStatus(Configuration conf, HdfsFileStatus sourceStatus, */ public Configuration getConfiguration(JobContext context); + /** + * Get job conf from the old style JobContext. + * @param context job context + * @return job conf + */ + public JobConf getJobConf(org.apache.hadoop.mapred.JobContext context); + public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException; public void getMergedCredentials(JobConf jobConf) throws IOException; diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java new file mode 100644 index 0000000..bf196ae --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -0,0 +1,441 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.shims; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.net.URI; +import java.security.AccessControlException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DefaultFileAccess; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil; +import org.apache.hadoop.mapred.ClusterStatus; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.lib.CombineFileInputFormat; +import org.apache.hadoop.mapred.lib.CombineFileSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Progressable; + +import com.google.common.primitives.Longs; + +/** + * Base implemention for shims against secure Hadoop 0.20.3/0.23. + */ +public abstract class HadoopShimsSecure implements HadoopShims { + + static final Log LOG = LogFactory.getLog(HadoopShimsSecure.class); + + @Override + public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() { + return new CombineFileInputFormatShim() { + @Override + public RecordReader getRecordReader(InputSplit split, + JobConf job, Reporter reporter) throws IOException { + throw new IOException("CombineFileInputFormat.getRecordReader not needed."); + } + }; + } + + public static class InputSplitShim extends CombineFileSplit { + long shrinkedLength; + boolean _isShrinked; + public InputSplitShim() { + super(); + _isShrinked = false; + } + + public InputSplitShim(JobConf conf, Path[] paths, long[] startOffsets, + long[] lengths, String[] locations) throws IOException { + super(conf, paths, startOffsets, lengths, dedup(locations)); + _isShrinked = false; + } + + public void shrinkSplit(long length) { + _isShrinked = true; + shrinkedLength = length; + } + + public boolean isShrinked() { + return _isShrinked; + } + + public long getShrinkedLength() { + return shrinkedLength; + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + _isShrinked = in.readBoolean(); + if (_isShrinked) { + shrinkedLength = in.readLong(); + } + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeBoolean(_isShrinked); + if (_isShrinked) { + out.writeLong(shrinkedLength); + } + } + } + + /* This class should be replaced with org.apache.hadoop.mapred.lib.CombineFileRecordReader class, once + * https://issues.apache.org/jira/browse/MAPREDUCE-955 is fixed. This code should be removed - it is a copy + * of org.apache.hadoop.mapred.lib.CombineFileRecordReader + */ + public static class CombineFileRecordReader implements RecordReader { + + static final Class[] constructorSignature = new Class[] { + InputSplit.class, + Configuration.class, + Reporter.class, + Integer.class + }; + + protected CombineFileSplit split; + protected JobConf jc; + protected Reporter reporter; + protected Class> rrClass; + protected Constructor> rrConstructor; + protected FileSystem fs; + + protected int idx; + protected long progress; + protected RecordReader curReader; + protected boolean isShrinked; + protected long shrinkedLength; + + @Override + public boolean next(K key, V value) throws IOException { + + while ((curReader == null) + || !doNextWithExceptionHandler((K) ((CombineHiveKey) key).getKey(), + value)) { + if (!initNextRecordReader(key)) { + return false; + } + } + return true; + } + + @Override + public K createKey() { + K newKey = curReader.createKey(); + return (K)(new CombineHiveKey(newKey)); + } + + @Override + public V createValue() { + return curReader.createValue(); + } + + /** + * Return the amount of data processed. + */ + @Override + public long getPos() throws IOException { + return progress; + } + + @Override + public void close() throws IOException { + if (curReader != null) { + curReader.close(); + curReader = null; + } + } + + /** + * Return progress based on the amount of data processed so far. + */ + @Override + public float getProgress() throws IOException { + return Math.min(1.0f, progress / (float) (split.getLength())); + } + + /** + * A generic RecordReader that can hand out different recordReaders + * for each chunk in the CombineFileSplit. + */ + public CombineFileRecordReader(JobConf job, CombineFileSplit split, + Reporter reporter, + Class> rrClass) + throws IOException { + this.split = split; + this.jc = job; + this.rrClass = rrClass; + this.reporter = reporter; + this.idx = 0; + this.curReader = null; + this.progress = 0; + + isShrinked = false; + + assert (split instanceof InputSplitShim); + if (((InputSplitShim) split).isShrinked()) { + isShrinked = true; + shrinkedLength = ((InputSplitShim) split).getShrinkedLength(); + } + + try { + rrConstructor = rrClass.getDeclaredConstructor(constructorSignature); + rrConstructor.setAccessible(true); + } catch (Exception e) { + throw new RuntimeException(rrClass.getName() + + " does not have valid constructor", e); + } + initNextRecordReader(null); + } + + /** + * do next and handle exception inside it. + * @param key + * @param value + * @return + * @throws IOException + */ + private boolean doNextWithExceptionHandler(K key, V value) throws IOException { + try { + return curReader.next(key, value); + } catch (Exception e) { + return HiveIOExceptionHandlerUtil + .handleRecordReaderNextException(e, jc); + } + } + + /** + * Get the record reader for the next chunk in this CombineFileSplit. + */ + protected boolean initNextRecordReader(K key) throws IOException { + + if (curReader != null) { + curReader.close(); + curReader = null; + if (idx > 0) { + progress += split.getLength(idx - 1); // done processing so far + } + } + + // if all chunks have been processed, nothing more to do. + if (idx == split.getNumPaths() || (isShrinked && progress > shrinkedLength)) { + return false; + } + + // get a record reader for the idx-th chunk + try { + curReader = rrConstructor.newInstance(new Object[] + {split, jc, reporter, Integer.valueOf(idx)}); + + // change the key if need be + if (key != null) { + K newKey = curReader.createKey(); + ((CombineHiveKey)key).setKey(newKey); + } + + // setup some helper config variables. + jc.set("map.input.file", split.getPath(idx).toString()); + jc.setLong("map.input.start", split.getOffset(idx)); + jc.setLong("map.input.length", split.getLength(idx)); + } catch (Exception e) { + curReader = HiveIOExceptionHandlerUtil.handleRecordReaderCreationException( + e, jc); + } + idx++; + return true; + } + } + + public abstract static class CombineFileInputFormatShim extends + CombineFileInputFormat + implements HadoopShims.CombineFileInputFormatShim { + + @Override + public Path[] getInputPathsShim(JobConf conf) { + try { + return FileInputFormat.getInputPaths(conf); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void createPool(JobConf conf, PathFilter... filters) { + super.createPool(conf, filters); + } + + @Override + public CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOException { + long minSize = job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 0); + + // For backward compatibility, let the above parameter be used + if (job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERNODE"), 0) == 0) { + super.setMinSplitSizeNode(minSize); + } + + if (job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERRACK"), 0) == 0) { + super.setMinSplitSizeRack(minSize); + } + + if (job.getLong(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), 0) == 0) { + super.setMaxSplitSize(minSize); + } + + InputSplit[] splits = super.getSplits(job, numSplits); + + ArrayList inputSplitShims = new ArrayList(); + for (int pos = 0; pos < splits.length; pos++) { + CombineFileSplit split = (CombineFileSplit) splits[pos]; + Set dirIndices = getDirIndices(split.getPaths(), job); + if (dirIndices.size() != split.getPaths().length) { + List prunedPaths = prune(dirIndices, Arrays.asList(split.getPaths())); + List prunedStartOffsets = prune(dirIndices, Arrays.asList( + ArrayUtils.toObject(split.getStartOffsets()))); + List prunedLengths = prune(dirIndices, Arrays.asList( + ArrayUtils.toObject(split.getLengths()))); + inputSplitShims.add(new InputSplitShim(job, prunedPaths.toArray(new Path[prunedPaths.size()]), + Longs.toArray(prunedStartOffsets), + Longs.toArray(prunedLengths), split.getLocations())); + } + } + return inputSplitShims.toArray(new InputSplitShim[inputSplitShims.size()]); + } + + @Override + public InputSplitShim getInputSplitShim() throws IOException { + return new InputSplitShim(); + } + + @Override + public RecordReader getRecordReader(JobConf job, CombineFileSplit split, + Reporter reporter, + Class> rrClass) + throws IOException { + CombineFileSplit cfSplit = split; + return new CombineFileRecordReader(job, cfSplit, reporter, rrClass); + } + + } + + @Override + abstract public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception; + + @Override + abstract public org.apache.hadoop.mapreduce.TaskAttemptContext newTaskAttemptContext( + Configuration conf, final Progressable progressable); + + @Override + abstract public org.apache.hadoop.mapreduce.JobContext newJobContext(Job job); + + @Override + abstract public boolean isLocalMode(Configuration conf); + + @Override + abstract public void setJobLauncherRpcAddress(Configuration conf, String val); + + @Override + abstract public String getJobLauncherHttpAddress(Configuration conf); + + @Override + abstract public String getJobLauncherRpcAddress(Configuration conf); + + @Override + abstract public short getDefaultReplication(FileSystem fs, Path path); + + @Override + abstract public long getDefaultBlockSize(FileSystem fs, Path path); + + @Override + abstract public boolean moveToAppropriateTrash(FileSystem fs, Path path, Configuration conf) + throws IOException; + + @Override + abstract public FileSystem createProxyFileSystem(FileSystem fs, URI uri); + + @Override + abstract public FileSystem getNonCachedFileSystem(URI uri, Configuration conf) throws IOException; + + protected void run(FsShell shell, String[] command) throws Exception { + LOG.debug(ArrayUtils.toString(command)); + int retval = shell.run(command); + LOG.debug("Return value is :" + retval); + } + + /** + * CombineFileInputFormat sometimes returns directories as splits, need to prune them. + */ + private static Set getDirIndices(Path[] paths, JobConf conf) throws IOException { + Set result = new HashSet(); + for (int i = 0; i < paths.length; i++) { + FileSystem fs = paths[i].getFileSystem(conf); + if (!fs.isFile(paths[i])) { + result.add(i); + } + } + return result; + } + + private static List prune(Set indicesToPrune, List elms) { + List result = new ArrayList(); + int i = 0; + for (K elm : elms) { + if (indicesToPrune.contains(i)) { + continue; + } + result.add(elm); + i++; + } + return result; + } + + private static String[] dedup(String[] locations) throws IOException { + Set dedup = new HashSet(); + Collections.addAll(dedup, locations); + return dedup.toArray(new String[dedup.size()]); + } + + @Override + public void checkFileAccess(FileSystem fs, FileStatus stat, FsAction action) + throws IOException, AccessControlException, Exception { + DefaultFileAccess.checkFileAccess(fs, stat, action); + } +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java index b84f1f2..f19e3c6 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/ShimLoader.java @@ -20,7 +20,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge; import org.apache.hadoop.util.VersionInfo; import org.apache.log4j.AppenderSkeleton; @@ -43,7 +42,6 @@ new HashMap(); static { - HADOOP_SHIM_CLASSES.put("0.20", "org.apache.hadoop.hive.shims.Hadoop20Shims"); HADOOP_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Hadoop20SShims"); HADOOP_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Hadoop23Shims"); } @@ -56,7 +54,6 @@ new HashMap(); static { - JETTY_SHIM_CLASSES.put("0.20", "org.apache.hadoop.hive.shims.Jetty20Shims"); JETTY_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.hive.shims.Jetty20SShims"); JETTY_SHIM_CLASSES.put("0.23", "org.apache.hadoop.hive.shims.Jetty23Shims"); } @@ -68,7 +65,6 @@ new HashMap(); static { - EVENT_COUNTER_SHIM_CLASSES.put("0.20", "org.apache.hadoop.metrics.jvm.EventCounter"); EVENT_COUNTER_SHIM_CLASSES.put("0.20S", "org.apache.hadoop.log.metrics.EventCounter"); EVENT_COUNTER_SHIM_CLASSES.put("0.23", "org.apache.hadoop.log.metrics.EventCounter"); } @@ -80,10 +76,8 @@ new HashMap(); static { - HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20", - "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.20S", - "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge20S"); + "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge"); HADOOP_THRIFT_AUTH_BRIDGE_CLASSES.put("0.23", "org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge23"); } @@ -153,9 +147,7 @@ public static synchronized SchedulerShim getSchedulerShims() { /** * Return the "major" version of Hadoop currently on the classpath. - * For releases in the 0.x series this is simply the first two - * components of the version, e.g. "0.20" or "0.23". Releases in - * the 1.x and 2.x series are mapped to the appropriate + * Releases in the 1.x and 2.x series are mapped to the appropriate * 0.x release series, e.g. 1.x is mapped to "0.20S" and 2.x * is mapped to "0.23". */ @@ -168,10 +160,7 @@ public static String getMajorVersion() { " (expected A.B.* format)"); } - // Special handling for Hadoop 1.x and 2.x switch (Integer.parseInt(parts[0])) { - case 0: - break; case 1: return "0.20S"; case 2: @@ -179,19 +168,6 @@ public static String getMajorVersion() { default: throw new IllegalArgumentException("Unrecognized Hadoop major version number: " + vers); } - - String majorVersion = parts[0] + "." + parts[1]; - - // If we are running a security release, we won't have UnixUserGroupInformation - // (removed by HADOOP-6299 when switching to JAAS for Login) - try { - Class.forName("org.apache.hadoop.security.UnixUserGroupInformation"); - } catch (ClassNotFoundException cnf) { - if ("0.20".equals(majorVersion)) { - majorVersion += "S"; - } - } - return majorVersion; } private ShimLoader() { diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java new file mode 100644 index 0000000..2648d1d --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/Utils.java @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.shims; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.LoginException; +import javax.security.auth.login.AppConfigurationEntry.LoginModuleControlFlag; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; +import org.apache.hadoop.hive.thrift.DelegationTokenSelector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.util.KerberosUtil; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.security.token.TokenSelector; +import org.apache.zookeeper.client.ZooKeeperSaslClient; + +public class Utils { + + public static UserGroupInformation getUGIForConf(Configuration conf) throws LoginException, IOException { + String doAs = System.getenv("HADOOP_USER_NAME"); + if(doAs != null && doAs.length() > 0) { + /* + * this allows doAs (proxy user) to be passed along across process boundary where + * delegation tokens are not supported. For example, a DDL stmt via WebHCat with + * a doAs parameter, forks to 'hcat' which needs to start a Session that + * proxies the end user + */ + return UserGroupInformation.createProxyUser(doAs, UserGroupInformation.getLoginUser()); + } + return UserGroupInformation.getCurrentUser(); + } + + /** + * Get the string form of the token given a token signature. + * The signature is used as the value of the "service" field in the token for lookup. + * Ref: AbstractDelegationTokenSelector in Hadoop. If there exists such a token + * in the token cache (credential store) of the job, the lookup returns that. + * This is relevant only when running against a "secure" hadoop release + * The method gets hold of the tokens if they are set up by hadoop - this should + * happen on the map/reduce tasks if the client added the tokens into hadoop's + * credential store in the front end during job submission. The method will + * select the hive delegation token among the set of tokens and return the string + * form of it + * @param tokenSignature + * @return the string form of the token found + * @throws IOException + */ + public static String getTokenStrForm(String tokenSignature) throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + TokenSelector tokenSelector = new DelegationTokenSelector(); + + Token token = tokenSelector.selectToken( + tokenSignature == null ? new Text() : new Text(tokenSignature), ugi.getTokens()); + return token != null ? token.encodeToUrlString() : null; + } + + /** + * Create a delegation token object for the given token string and service. + * Add the token to given UGI + * @param ugi + * @param tokenStr + * @param tokenService + * @throws IOException + */ + public static void setTokenStr(UserGroupInformation ugi, String tokenStr, String tokenService) + throws IOException { + Token delegationToken = createToken(tokenStr, tokenService); + ugi.addToken(delegationToken); + } + + /** + * Add a given service to delegation token string. + * @param tokenStr + * @param tokenService + * @return + * @throws IOException + */ + public static String addServiceToToken(String tokenStr, String tokenService) + throws IOException { + Token delegationToken = createToken(tokenStr, tokenService); + return delegationToken.encodeToUrlString(); + } + + /** + * Create a new token using the given string and service + * @param tokenStr + * @param tokenService + * @return + * @throws IOException + */ + private static Token createToken(String tokenStr, String tokenService) + throws IOException { + Token delegationToken = new Token(); + delegationToken.decodeFromUrlString(tokenStr); + delegationToken.setService(new Text(tokenService)); + return delegationToken; + } + + /** + * Dynamically sets up the JAAS configuration that uses kerberos + * @param principal + * @param keyTabFile + * @throws IOException + */ + public static void setZookeeperClientKerberosJaasConfig(String principal, String keyTabFile) throws IOException { + // ZooKeeper property name to pick the correct JAAS conf section + final String SASL_LOGIN_CONTEXT_NAME = "HiveZooKeeperClient"; + System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, SASL_LOGIN_CONTEXT_NAME); + + principal = SecurityUtil.getServerPrincipal(principal, "0.0.0.0"); + JaasConfiguration jaasConf = new JaasConfiguration(SASL_LOGIN_CONTEXT_NAME, principal, keyTabFile); + + // Install the Configuration in the runtime. + javax.security.auth.login.Configuration.setConfiguration(jaasConf); + } + + /** + * A JAAS configuration for ZooKeeper clients intended to use for SASL + * Kerberos. + */ + private static class JaasConfiguration extends javax.security.auth.login.Configuration { + // Current installed Configuration + private final javax.security.auth.login.Configuration baseConfig = javax.security.auth.login.Configuration + .getConfiguration(); + private final String loginContextName; + private final String principal; + private final String keyTabFile; + + public JaasConfiguration(String hiveLoginContextName, String principal, String keyTabFile) { + this.loginContextName = hiveLoginContextName; + this.principal = principal; + this.keyTabFile = keyTabFile; + } + + @Override + public AppConfigurationEntry[] getAppConfigurationEntry(String appName) { + if (loginContextName.equals(appName)) { + Map krbOptions = new HashMap(); + krbOptions.put("doNotPrompt", "true"); + krbOptions.put("storeKey", "true"); + krbOptions.put("useKeyTab", "true"); + krbOptions.put("principal", principal); + krbOptions.put("keyTab", keyTabFile); + krbOptions.put("refreshKrb5Config", "true"); + AppConfigurationEntry hiveZooKeeperClientEntry = new AppConfigurationEntry( + KerberosUtil.getKrb5LoginModuleName(), LoginModuleControlFlag.REQUIRED, krbOptions); + return new AppConfigurationEntry[] { hiveZooKeeperClientEntry }; + } + // Try the base config + if (baseConfig != null) { + return baseConfig.getAppConfigurationEntry(appName); + } + return null; + } + } + + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java new file mode 100644 index 0000000..0f56b8c --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; +import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DBTokenStore implements DelegationTokenStore { + private static final Logger LOG = LoggerFactory.getLogger(DBTokenStore.class); + + @Override + public int addMasterKey(String s) throws TokenStoreException { + if (LOG.isTraceEnabled()) { + LOG.trace("addMasterKey: s = " + s); + } + return (Integer)invokeOnRawStore("addMasterKey", new Object[]{s},String.class); + } + + @Override + public void updateMasterKey(int keySeq, String s) throws TokenStoreException { + if (LOG.isTraceEnabled()) { + LOG.trace("updateMasterKey: s = " + s + ", keySeq = " + keySeq); + } + invokeOnRawStore("updateMasterKey", new Object[] {Integer.valueOf(keySeq), s}, + Integer.class, String.class); + } + + @Override + public boolean removeMasterKey(int keySeq) { + return (Boolean)invokeOnRawStore("removeMasterKey", new Object[] {Integer.valueOf(keySeq)}, + Integer.class); + } + + @Override + public String[] getMasterKeys() throws TokenStoreException { + return (String[])invokeOnRawStore("getMasterKeys", new Object[0]); + } + + @Override + public boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) throws TokenStoreException { + + try { + String identifier = TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier); + String tokenStr = Base64.encodeBase64URLSafeString( + HiveDelegationTokenSupport.encodeDelegationTokenInformation(token)); + boolean result = (Boolean)invokeOnRawStore("addToken", new Object[] {identifier, tokenStr}, + String.class, String.class); + if (LOG.isTraceEnabled()) { + LOG.trace("addToken: tokenIdentifier = " + tokenIdentifier + ", addded = " + result); + } + return result; + } catch (IOException e) { + throw new TokenStoreException(e); + } + } + + @Override + public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) + throws TokenStoreException { + try { + String tokenStr = (String)invokeOnRawStore("getToken", new Object[] { + TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier)}, String.class); + DelegationTokenInformation result = null; + if (tokenStr != null) { + result = HiveDelegationTokenSupport.decodeDelegationTokenInformation(Base64.decodeBase64(tokenStr)); + } + if (LOG.isTraceEnabled()) { + LOG.trace("getToken: tokenIdentifier = " + tokenIdentifier + ", result = " + result); + } + return result; + } catch (IOException e) { + throw new TokenStoreException(e); + } + } + + @Override + public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException{ + try { + boolean result = (Boolean)invokeOnRawStore("removeToken", new Object[] { + TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier)}, String.class); + if (LOG.isTraceEnabled()) { + LOG.trace("removeToken: tokenIdentifier = " + tokenIdentifier + ", addded = " + result); + } + return result; + } catch (IOException e) { + throw new TokenStoreException(e); + } + } + + @Override + public List getAllDelegationTokenIdentifiers() throws TokenStoreException{ + + List tokenIdents = (List)invokeOnRawStore("getAllTokenIdentifiers", new Object[0]); + List delTokenIdents = new ArrayList(tokenIdents.size()); + + for (String tokenIdent : tokenIdents) { + DelegationTokenIdentifier delToken = new DelegationTokenIdentifier(); + try { + TokenStoreDelegationTokenSecretManager.decodeWritable(delToken, tokenIdent); + } catch (IOException e) { + throw new TokenStoreException(e); + } + delTokenIdents.add(delToken); + } + return delTokenIdents; + } + + private Object rawStore; + + @Override + public void init(Object rawStore, ServerMode smode) throws TokenStoreException { + this.rawStore = rawStore; + } + + private Object invokeOnRawStore(String methName, Object[] params, Class ... paramTypes) + throws TokenStoreException{ + + try { + return rawStore.getClass().getMethod(methName, paramTypes).invoke(rawStore, params); + } catch (IllegalArgumentException e) { + throw new TokenStoreException(e); + } catch (SecurityException e) { + throw new TokenStoreException(e); + } catch (IllegalAccessException e) { + throw new TokenStoreException(e); + } catch (InvocationTargetException e) { + throw new TokenStoreException(e.getCause()); + } catch (NoSuchMethodException e) { + throw new TokenStoreException(e); + } + } + + @Override + public void setConf(Configuration conf) { + // No-op + } + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void close() throws IOException { + // No-op. + } + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java new file mode 100644 index 0000000..4ca3c0b --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenIdentifier.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; + +/** + * A delegation token identifier that is specific to Hive. + */ +public class DelegationTokenIdentifier + extends AbstractDelegationTokenIdentifier { + public static final Text HIVE_DELEGATION_KIND = new Text("HIVE_DELEGATION_TOKEN"); + + /** + * Create an empty delegation token identifier for reading into. + */ + public DelegationTokenIdentifier() { + } + + /** + * Create a new delegation token identifier + * @param owner the effective username of the token owner + * @param renewer the username of the renewer + * @param realUser the real username of the token owner + */ + public DelegationTokenIdentifier(Text owner, Text renewer, Text realUser) { + super(owner, renewer, realUser); + } + + @Override + public Text getKind() { + return HIVE_DELEGATION_KIND; + } + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java new file mode 100644 index 0000000..19d1fbf --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSecretManager.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; + +/** + * A Hive specific delegation token secret manager. + * The secret manager is responsible for generating and accepting the password + * for each token. + */ +public class DelegationTokenSecretManager + extends AbstractDelegationTokenSecretManager { + + /** + * Create a secret manager + * @param delegationKeyUpdateInterval the number of seconds for rolling new + * secret keys. + * @param delegationTokenMaxLifetime the maximum lifetime of the delegation + * tokens + * @param delegationTokenRenewInterval how often the tokens must be renewed + * @param delegationTokenRemoverScanInterval how often the tokens are scanned + * for expired tokens + */ + public DelegationTokenSecretManager(long delegationKeyUpdateInterval, + long delegationTokenMaxLifetime, + long delegationTokenRenewInterval, + long delegationTokenRemoverScanInterval) { + super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, + delegationTokenRenewInterval, delegationTokenRemoverScanInterval); + } + + @Override + public DelegationTokenIdentifier createIdentifier() { + return new DelegationTokenIdentifier(); + } + + public synchronized void cancelDelegationToken(String tokenStrForm) throws IOException { + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + String user = UserGroupInformation.getCurrentUser().getUserName(); + cancelToken(t, user); + } + + public synchronized long renewDelegationToken(String tokenStrForm) throws IOException { + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + String user = UserGroupInformation.getCurrentUser().getUserName(); + return renewToken(t, user); + } + + public synchronized String getDelegationToken(String renewer) throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + Text owner = new Text(ugi.getUserName()); + Text realUser = null; + if (ugi.getRealUser() != null) { + realUser = new Text(ugi.getRealUser().getUserName()); + } + DelegationTokenIdentifier ident = + new DelegationTokenIdentifier(owner, new Text(renewer), realUser); + Token t = new Token( + ident, this); + return t.encodeToUrlString(); + } + + public String getUserFromToken(String tokenStr) throws IOException { + Token delegationToken = new Token(); + delegationToken.decodeFromUrlString(tokenStr); + + ByteArrayInputStream buf = new ByteArrayInputStream(delegationToken.getIdentifier()); + DataInputStream in = new DataInputStream(buf); + DelegationTokenIdentifier id = createIdentifier(); + id.readFields(in); + return id.getUser().getShortUserName(); + } +} + diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java new file mode 100644 index 0000000..f6e2420 --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenSelector.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; + +/** + * A delegation token that is specialized for Hive + */ + +public class DelegationTokenSelector + extends AbstractDelegationTokenSelector{ + + public DelegationTokenSelector() { + super(DelegationTokenIdentifier.HIVE_DELEGATION_KIND); + } +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java new file mode 100644 index 0000000..867b4ed --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DelegationTokenStore.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.thrift; + +import java.io.Closeable; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Interface for pluggable token store that can be implemented with shared external + * storage for load balancing and high availability (for example using ZooKeeper). + * Internal, store specific errors are translated into {@link TokenStoreException}. + */ +public interface DelegationTokenStore extends Configurable, Closeable { + + /** + * Exception for internal token store errors that typically cannot be handled by the caller. + */ + public static class TokenStoreException extends RuntimeException { + private static final long serialVersionUID = -8693819817623074083L; + + public TokenStoreException(Throwable cause) { + super(cause); + } + + public TokenStoreException(String message, Throwable cause) { + super(message, cause); + } + } + + /** + * Add new master key. The token store assigns and returns the sequence number. + * Caller needs to use the identifier to update the key (since it is embedded in the key). + * + * @param s + * @return sequence number for new key + */ + int addMasterKey(String s) throws TokenStoreException; + + /** + * Update master key (for expiration and setting store assigned sequence within key) + * @param keySeq + * @param s + * @throws TokenStoreException + */ + void updateMasterKey(int keySeq, String s) throws TokenStoreException; + + /** + * Remove key for given id. + * @param keySeq + * @return false if key no longer present, true otherwise. + */ + boolean removeMasterKey(int keySeq); + + /** + * Return all master keys. + * @return + * @throws TokenStoreException + */ + String[] getMasterKeys() throws TokenStoreException; + + /** + * Add token. If identifier is already present, token won't be added. + * @param tokenIdentifier + * @param token + * @return true if token was added, false for existing identifier + */ + boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) throws TokenStoreException; + + /** + * Get token. Returns null if the token does not exist. + * @param tokenIdentifier + * @return + */ + DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) + throws TokenStoreException; + + /** + * Remove token. Return value can be used by caller to detect concurrency. + * @param tokenIdentifier + * @return true if token was removed, false if it was already removed. + * @throws TokenStoreException + */ + boolean removeToken(DelegationTokenIdentifier tokenIdentifier) throws TokenStoreException; + + /** + * List of all token identifiers in the store. This is used to remove expired tokens + * and a potential scalability improvement would be to partition by master key id + * @return + */ + List getAllDelegationTokenIdentifiers() throws TokenStoreException; + + /** + * @param hmsHandler ObjectStore used by DBTokenStore + * @param smode Indicate whether this is a metastore or hiveserver2 token store + */ + void init(Object hmsHandler, ServerMode smode); + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java index d011c67..348c419 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java @@ -15,107 +15,726 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.hive.thrift; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION; + import java.io.IOException; import java.net.InetAddress; +import java.net.Socket; +import java.security.PrivilegedAction; +import java.security.PrivilegedExceptionAction; +import java.util.Locale; import java.util.Map; +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.RealmCallback; +import javax.security.sasl.RealmChoiceCallback; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport; +import org.apache.hadoop.security.SaslRpcServer; +import org.apache.hadoop.security.SaslRpcServer.AuthMethod; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; +import org.apache.hadoop.security.authorize.AuthorizationException; +import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.thrift.TException; import org.apache.thrift.TProcessor; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TSaslClientTransport; +import org.apache.thrift.transport.TSaslServerTransport; +import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import org.apache.thrift.transport.TTransportFactory; /** - * This class is only overridden by the secure hadoop shim. It allows - * the Thrift SASL support to bridge to Hadoop's UserGroupInformation - * & DelegationToken infrastructure. + * Functions that bridge Thrift's SASL transports to Hadoop's + * SASL callback handlers and authentication classes. */ public class HadoopThriftAuthBridge { + static final Log LOG = LogFactory.getLog(HadoopThriftAuthBridge.class); + public Client createClient() { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + return new Client(); } - public Client createClientWithConf(String authType) { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + public Client createClientWithConf(String authMethod) { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getLoginUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current login user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return new Client(); + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return new Client(); + } } - public UserGroupInformation getCurrentUGIWithConf(String authType) - throws IOException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + public Server createServer(String keytabFile, String principalConf) throws TTransportException { + return new Server(keytabFile, principalConf); } public String getServerPrincipal(String principalConfig, String host) throws IOException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); + String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); + if (names.length != 3) { + throw new IOException( + "Kerberos principal name does NOT have the expected hostname part: " + + serverPrincipal); + } + return serverPrincipal; + } + + + public UserGroupInformation getCurrentUGIWithConf(String authMethod) + throws IOException { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getCurrentUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return ugi; + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return UserGroupInformation.getCurrentUser(); + } } - public Server createServer(String keytabFile, String principalConf) - throws TTransportException { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + /** + * Return true if the current login user is already using the given authMethod. + * + * Used above to ensure we do not create a new Configuration object and as such + * lose other settings such as the cluster to which the JVM is connected. Required + * for oozie since it does not have a core-site.xml see HIVE-7682 + */ + private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { + AuthenticationMethod authMethod; + try { + // based on SecurityUtil.getAuthenticationMethod() + authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Invalid attribute value for " + + HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); + } + LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); + return ugi.getAuthenticationMethod().equals(authMethod); } /** * Read and return Hadoop SASL configuration which can be configured using * "hadoop.rpc.protection" - * * @param conf * @return Hadoop SASL configuration */ + public Map getHadoopSaslProperties(Configuration conf) { - throw new UnsupportedOperationException( - "The current version of Hadoop does not support Authentication"); + // Initialize the SaslRpcServer to ensure QOP parameters are read from conf + SaslRpcServer.init(conf); + return SaslRpcServer.SASL_PROPS; } - public static abstract class Client { + public static class Client { /** + * Create a client-side SASL transport that wraps an underlying transport. * - * @param principalConfig In the case of Kerberos authentication this will - * be the kerberos principal name, for DIGEST-MD5 (delegation token) based - * authentication this will be null - * @param host The metastore server host name - * @param methodStr "KERBEROS" or "DIGEST" - * @param tokenStrForm This is url encoded string form of - * org.apache.hadoop.security.token. - * @param underlyingTransport the underlying transport - * @return the transport - * @throws IOException + * @param method The authentication method to use. Currently only KERBEROS is + * supported. + * @param serverPrincipal The Kerberos principal of the target server. + * @param underlyingTransport The underlying transport mechanism, usually a TSocket. + * @param saslProps the sasl properties to create the client with */ - public abstract TTransport createClientTransport( + + + public TTransport createClientTransport( String principalConfig, String host, String methodStr, String tokenStrForm, TTransport underlyingTransport, - Map saslProps) - throws IOException; + Map saslProps) throws IOException { + AuthMethod method = AuthMethod.valueOf(AuthMethod.class, methodStr); + + TTransport saslTransport = null; + switch (method) { + case DIGEST: + Token t= new Token(); + t.decodeFromUrlString(tokenStrForm); + saslTransport = new TSaslClientTransport( + method.getMechanismName(), + null, + null, SaslRpcServer.SASL_DEFAULT_REALM, + saslProps, new SaslClientCallbackHandler(t), + underlyingTransport); + return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); + + case KERBEROS: + String serverPrincipal = SecurityUtil.getServerPrincipal(principalConfig, host); + String names[] = SaslRpcServer.splitKerberosName(serverPrincipal); + if (names.length != 3) { + throw new IOException( + "Kerberos principal name does NOT have the expected hostname part: " + + serverPrincipal); + } + try { + saslTransport = new TSaslClientTransport( + method.getMechanismName(), + null, + names[0], names[1], + saslProps, null, + underlyingTransport); + return new TUGIAssumingTransport(saslTransport, UserGroupInformation.getCurrentUser()); + } catch (SaslException se) { + throw new IOException("Could not instantiate SASL transport", se); + } + + default: + throw new IOException("Unsupported authentication method: " + method); + } + } + private static class SaslClientCallbackHandler implements CallbackHandler { + private final String userName; + private final char[] userPassword; + + public SaslClientCallbackHandler(Token token) { + this.userName = encodeIdentifier(token.getIdentifier()); + this.userPassword = encodePassword(token.getPassword()); + } + + + @Override + public void handle(Callback[] callbacks) + throws UnsupportedCallbackException { + NameCallback nc = null; + PasswordCallback pc = null; + RealmCallback rc = null; + for (Callback callback : callbacks) { + if (callback instanceof RealmChoiceCallback) { + continue; + } else if (callback instanceof NameCallback) { + nc = (NameCallback) callback; + } else if (callback instanceof PasswordCallback) { + pc = (PasswordCallback) callback; + } else if (callback instanceof RealmCallback) { + rc = (RealmCallback) callback; + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL client callback"); + } + } + if (nc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting username: " + userName); + } + nc.setName(userName); + } + if (pc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting userPassword"); + } + pc.setPassword(userPassword); + } + if (rc != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("SASL client callback: setting realm: " + + rc.getDefaultText()); + } + rc.setText(rc.getDefaultText()); + } + } + + static String encodeIdentifier(byte[] identifier) { + return new String(Base64.encodeBase64(identifier)); + } + + static char[] encodePassword(byte[] password) { + return new String(Base64.encodeBase64(password)).toCharArray(); + } + } } - public static abstract class Server { + public static class Server { public enum ServerMode { HIVESERVER2, METASTORE }; - public abstract TTransportFactory createTransportFactory(Map saslProps) throws TTransportException; - public abstract TProcessor wrapProcessor(TProcessor processor); - public abstract TProcessor wrapNonAssumingProcessor(TProcessor processor); - public abstract InetAddress getRemoteAddress(); - public abstract void startDelegationTokenSecretManager(Configuration conf, - Object hmsHandler, ServerMode smode) throws IOException; - public abstract String getDelegationToken(String owner, String renewer) - throws IOException, InterruptedException; - public abstract String getDelegationTokenWithService(String owner, String renewer, String service) - throws IOException, InterruptedException; - public abstract String getRemoteUser(); - public abstract long renewDelegationToken(String tokenStrForm) throws IOException; - public abstract void cancelDelegationToken(String tokenStrForm) throws IOException; - public abstract String getUserFromToken(String tokenStr) throws IOException; + final UserGroupInformation realUgi; + DelegationTokenSecretManager secretManager; + private final static long DELEGATION_TOKEN_GC_INTERVAL = 3600000; // 1 hour + //Delegation token related keys + public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = + "hive.cluster.delegation.key.update-interval"; + public static final long DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = + 24*60*60*1000; // 1 day + public static final String DELEGATION_TOKEN_RENEW_INTERVAL_KEY = + "hive.cluster.delegation.token.renew-interval"; + public static final long DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = + 24*60*60*1000; // 1 day + public static final String DELEGATION_TOKEN_MAX_LIFETIME_KEY = + "hive.cluster.delegation.token.max-lifetime"; + public static final long DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = + 7*24*60*60*1000; // 7 days + public static final String DELEGATION_TOKEN_STORE_CLS = + "hive.cluster.delegation.token.store.class"; + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR = + "hive.cluster.delegation.token.store.zookeeper.connectString"; + // alternate connect string specification configuration + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE = + "hive.zookeeper.quorum"; + + public static final String DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS = + "hive.cluster.delegation.token.store.zookeeper.connectTimeoutMillis"; + public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE = + "hive.cluster.delegation.token.store.zookeeper.znode"; + public static final String DELEGATION_TOKEN_STORE_ZK_ACL = + "hive.cluster.delegation.token.store.zookeeper.acl"; + public static final String DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT = + "/hivedelegation"; + + public Server() throws TTransportException { + try { + realUgi = UserGroupInformation.getCurrentUser(); + } catch (IOException ioe) { + throw new TTransportException(ioe); + } + } + /** + * Create a server with a kerberos keytab/principal. + */ + protected Server(String keytabFile, String principalConf) + throws TTransportException { + if (keytabFile == null || keytabFile.isEmpty()) { + throw new TTransportException("No keytab specified"); + } + if (principalConf == null || principalConf.isEmpty()) { + throw new TTransportException("No principal specified"); + } + + // Login from the keytab + String kerberosName; + try { + kerberosName = + SecurityUtil.getServerPrincipal(principalConf, "0.0.0.0"); + UserGroupInformation.loginUserFromKeytab( + kerberosName, keytabFile); + realUgi = UserGroupInformation.getLoginUser(); + assert realUgi.isFromKeytab(); + } catch (IOException ioe) { + throw new TTransportException(ioe); + } + } + + /** + * Create a TTransportFactory that, upon connection of a client socket, + * negotiates a Kerberized SASL transport. The resulting TTransportFactory + * can be passed as both the input and output transport factory when + * instantiating a TThreadPoolServer, for example. + * + * @param saslProps Map of SASL properties + */ + + public TTransportFactory createTransportFactory(Map saslProps) + throws TTransportException { + // Parse out the kerberos principal, host, realm. + String kerberosName = realUgi.getUserName(); + final String names[] = SaslRpcServer.splitKerberosName(kerberosName); + if (names.length != 3) { + throw new TTransportException("Kerberos principal should have 3 parts: " + kerberosName); + } + + TSaslServerTransport.Factory transFactory = new TSaslServerTransport.Factory(); + transFactory.addServerDefinition( + AuthMethod.KERBEROS.getMechanismName(), + names[0], names[1], // two parts of kerberos principal + saslProps, + new SaslRpcServer.SaslGssCallbackHandler()); + transFactory.addServerDefinition(AuthMethod.DIGEST.getMechanismName(), + null, SaslRpcServer.SASL_DEFAULT_REALM, + saslProps, new SaslDigestCallbackHandler(secretManager)); + + return new TUGIAssumingTransportFactory(transFactory, realUgi); + } + + /** + * Wrap a TProcessor in such a way that, before processing any RPC, it + * assumes the UserGroupInformation of the user authenticated by + * the SASL transport. + */ + + public TProcessor wrapProcessor(TProcessor processor) { + return new TUGIAssumingProcessor(processor, secretManager, true); + } + + /** + * Wrap a TProcessor to capture the client information like connecting userid, ip etc + */ + + public TProcessor wrapNonAssumingProcessor(TProcessor processor) { + return new TUGIAssumingProcessor(processor, secretManager, false); + } + + protected DelegationTokenStore getTokenStore(Configuration conf) + throws IOException { + String tokenStoreClassName = conf.get(DELEGATION_TOKEN_STORE_CLS, ""); + if (StringUtils.isBlank(tokenStoreClassName)) { + return new MemoryTokenStore(); + } + try { + Class storeClass = Class + .forName(tokenStoreClassName).asSubclass( + DelegationTokenStore.class); + return ReflectionUtils.newInstance(storeClass, conf); + } catch (ClassNotFoundException e) { + throw new IOException("Error initializing delegation token store: " + tokenStoreClassName, + e); + } + } + + + public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode) + throws IOException{ + long secretKeyInterval = + conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY, + DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); + long tokenMaxLifetime = + conf.getLong(DELEGATION_TOKEN_MAX_LIFETIME_KEY, + DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); + long tokenRenewInterval = + conf.getLong(DELEGATION_TOKEN_RENEW_INTERVAL_KEY, + DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); + + DelegationTokenStore dts = getTokenStore(conf); + dts.init(rawStore, smode); + secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval, + tokenMaxLifetime, + tokenRenewInterval, + DELEGATION_TOKEN_GC_INTERVAL, dts); + secretManager.startThreads(); + } + + + public String getDelegationToken(final String owner, final String renewer) + throws IOException, InterruptedException { + if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { + throw new AuthorizationException( + "Delegation Token can be issued only with kerberos authentication. " + + "Current AuthenticationMethod: " + authenticationMethod.get() + ); + } + //if the user asking the token is same as the 'owner' then don't do + //any proxy authorization checks. For cases like oozie, where it gets + //a delegation token for another user, we need to make sure oozie is + //authorized to get a delegation token. + //Do all checks on short names + UserGroupInformation currUser = UserGroupInformation.getCurrentUser(); + UserGroupInformation ownerUgi = UserGroupInformation.createRemoteUser(owner); + if (!ownerUgi.getShortUserName().equals(currUser.getShortUserName())) { + //in the case of proxy users, the getCurrentUser will return the + //real user (for e.g. oozie) due to the doAs that happened just before the + //server started executing the method getDelegationToken in the MetaStore + ownerUgi = UserGroupInformation.createProxyUser(owner, + UserGroupInformation.getCurrentUser()); + InetAddress remoteAddr = getRemoteAddress(); + ProxyUsers.authorize(ownerUgi,remoteAddr.getHostAddress(), null); + } + return ownerUgi.doAs(new PrivilegedExceptionAction() { + + @Override + public String run() throws IOException { + return secretManager.getDelegationToken(renewer); + } + }); + } + + + public String getDelegationTokenWithService(String owner, String renewer, String service) + throws IOException, InterruptedException { + String token = getDelegationToken(owner, renewer); + return Utils.addServiceToToken(token, service); + } + + + public long renewDelegationToken(String tokenStrForm) throws IOException { + if (!authenticationMethod.get().equals(AuthenticationMethod.KERBEROS)) { + throw new AuthorizationException( + "Delegation Token can be issued only with kerberos authentication. " + + "Current AuthenticationMethod: " + authenticationMethod.get() + ); + } + return secretManager.renewDelegationToken(tokenStrForm); + } + + + public String getUserFromToken(String tokenStr) throws IOException { + return secretManager.getUserFromToken(tokenStr); + } + + + public void cancelDelegationToken(String tokenStrForm) throws IOException { + secretManager.cancelDelegationToken(tokenStrForm); + } + + final static ThreadLocal remoteAddress = + new ThreadLocal() { + + @Override + protected synchronized InetAddress initialValue() { + return null; + } + }; + + + public InetAddress getRemoteAddress() { + return remoteAddress.get(); + } + + final static ThreadLocal authenticationMethod = + new ThreadLocal() { + + @Override + protected synchronized AuthenticationMethod initialValue() { + return AuthenticationMethod.TOKEN; + } + }; + + private static ThreadLocal remoteUser = new ThreadLocal () { + + @Override + protected synchronized String initialValue() { + return null; + } + }; + + + public String getRemoteUser() { + return remoteUser.get(); + } + + /** CallbackHandler for SASL DIGEST-MD5 mechanism */ + // This code is pretty much completely based on Hadoop's + // SaslRpcServer.SaslDigestCallbackHandler - the only reason we could not + // use that Hadoop class as-is was because it needs a Server.Connection object + // which is relevant in hadoop rpc but not here in the metastore - so the + // code below does not deal with the Connection Server.object. + static class SaslDigestCallbackHandler implements CallbackHandler { + private final DelegationTokenSecretManager secretManager; + + public SaslDigestCallbackHandler( + DelegationTokenSecretManager secretManager) { + this.secretManager = secretManager; + } + + private char[] getPassword(DelegationTokenIdentifier tokenid) throws InvalidToken { + return encodePassword(secretManager.retrievePassword(tokenid)); + } + + private char[] encodePassword(byte[] password) { + return new String(Base64.encodeBase64(password)).toCharArray(); + } + /** {@inheritDoc} */ + + @Override + public void handle(Callback[] callbacks) throws InvalidToken, + UnsupportedCallbackException { + NameCallback nc = null; + PasswordCallback pc = null; + AuthorizeCallback ac = null; + for (Callback callback : callbacks) { + if (callback instanceof AuthorizeCallback) { + ac = (AuthorizeCallback) callback; + } else if (callback instanceof NameCallback) { + nc = (NameCallback) callback; + } else if (callback instanceof PasswordCallback) { + pc = (PasswordCallback) callback; + } else if (callback instanceof RealmCallback) { + continue; // realm is ignored + } else { + throw new UnsupportedCallbackException(callback, + "Unrecognized SASL DIGEST-MD5 Callback"); + } + } + if (pc != null) { + DelegationTokenIdentifier tokenIdentifier = SaslRpcServer. + getIdentifier(nc.getDefaultName(), secretManager); + char[] password = getPassword(tokenIdentifier); + + if (LOG.isDebugEnabled()) { + LOG.debug("SASL server DIGEST-MD5 callback: setting password " + + "for client: " + tokenIdentifier.getUser()); + } + pc.setPassword(password); + } + if (ac != null) { + String authid = ac.getAuthenticationID(); + String authzid = ac.getAuthorizationID(); + if (authid.equals(authzid)) { + ac.setAuthorized(true); + } else { + ac.setAuthorized(false); + } + if (ac.isAuthorized()) { + if (LOG.isDebugEnabled()) { + String username = + SaslRpcServer.getIdentifier(authzid, secretManager).getUser().getUserName(); + LOG.debug("SASL server DIGEST-MD5 callback: setting " + + "canonicalized client ID: " + username); + } + ac.setAuthorizedID(authzid); + } + } + } + } + + /** + * Processor that pulls the SaslServer object out of the transport, and + * assumes the remote user's UGI before calling through to the original + * processor. + * + * This is used on the server side to set the UGI for each specific call. + */ + protected class TUGIAssumingProcessor implements TProcessor { + final TProcessor wrapped; + DelegationTokenSecretManager secretManager; + boolean useProxy; + TUGIAssumingProcessor(TProcessor wrapped, DelegationTokenSecretManager secretManager, + boolean useProxy) { + this.wrapped = wrapped; + this.secretManager = secretManager; + this.useProxy = useProxy; + } + + + @Override + public boolean process(final TProtocol inProt, final TProtocol outProt) throws TException { + TTransport trans = inProt.getTransport(); + if (!(trans instanceof TSaslServerTransport)) { + throw new TException("Unexpected non-SASL transport " + trans.getClass()); + } + TSaslServerTransport saslTrans = (TSaslServerTransport)trans; + SaslServer saslServer = saslTrans.getSaslServer(); + String authId = saslServer.getAuthorizationID(); + authenticationMethod.set(AuthenticationMethod.KERBEROS); + LOG.debug("AUTH ID ======>" + authId); + String endUser = authId; + + if(saslServer.getMechanismName().equals("DIGEST-MD5")) { + try { + TokenIdentifier tokenId = SaslRpcServer.getIdentifier(authId, + secretManager); + endUser = tokenId.getUser().getUserName(); + authenticationMethod.set(AuthenticationMethod.TOKEN); + } catch (InvalidToken e) { + throw new TException(e.getMessage()); + } + } + Socket socket = ((TSocket)(saslTrans.getUnderlyingTransport())).getSocket(); + remoteAddress.set(socket.getInetAddress()); + UserGroupInformation clientUgi = null; + try { + if (useProxy) { + clientUgi = UserGroupInformation.createProxyUser( + endUser, UserGroupInformation.getLoginUser()); + remoteUser.set(clientUgi.getShortUserName()); + LOG.debug("Set remoteUser :" + remoteUser.get()); + return clientUgi.doAs(new PrivilegedExceptionAction() { + + @Override + public Boolean run() { + try { + return wrapped.process(inProt, outProt); + } catch (TException te) { + throw new RuntimeException(te); + } + } + }); + } else { + // use the short user name for the request + UserGroupInformation endUserUgi = UserGroupInformation.createRemoteUser(endUser); + remoteUser.set(endUserUgi.getShortUserName()); + LOG.debug("Set remoteUser :" + remoteUser.get() + ", from endUser :" + endUser); + return wrapped.process(inProt, outProt); + } + } catch (RuntimeException rte) { + if (rte.getCause() instanceof TException) { + throw (TException)rte.getCause(); + } + throw rte; + } catch (InterruptedException ie) { + throw new RuntimeException(ie); // unexpected! + } catch (IOException ioe) { + throw new RuntimeException(ioe); // unexpected! + } + finally { + if (clientUgi != null) { + try { FileSystem.closeAllForUGI(clientUgi); } + catch(IOException exception) { + LOG.error("Could not clean up file-system handles for UGI: " + clientUgi, exception); + } + } + } + } + } + + /** + * A TransportFactory that wraps another one, but assumes a specified UGI + * before calling through. + * + * This is used on the server side to assume the server's Principal when accepting + * clients. + */ + static class TUGIAssumingTransportFactory extends TTransportFactory { + private final UserGroupInformation ugi; + private final TTransportFactory wrapped; + + public TUGIAssumingTransportFactory(TTransportFactory wrapped, UserGroupInformation ugi) { + assert wrapped != null; + assert ugi != null; + this.wrapped = wrapped; + this.ugi = ugi; + } + + + @Override + public TTransport getTransport(final TTransport trans) { + return ugi.doAs(new PrivilegedAction() { + @Override + public TTransport run() { + return wrapped.getTransport(trans); + } + }); + } + } } } - diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java new file mode 100644 index 0000000..e982ea7 --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/MemoryTokenStore.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Default in-memory token store implementation. + */ +public class MemoryTokenStore implements DelegationTokenStore { + private static final Logger LOG = LoggerFactory.getLogger(MemoryTokenStore.class); + + private final Map masterKeys + = new ConcurrentHashMap(); + + private final ConcurrentHashMap tokens + = new ConcurrentHashMap(); + + private final AtomicInteger masterKeySeq = new AtomicInteger(); + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return this.conf; + } + + @Override + public int addMasterKey(String s) { + int keySeq = masterKeySeq.getAndIncrement(); + if (LOG.isTraceEnabled()) { + LOG.trace("addMasterKey: s = " + s + ", keySeq = " + keySeq); + } + masterKeys.put(keySeq, s); + return keySeq; + } + + @Override + public void updateMasterKey(int keySeq, String s) { + if (LOG.isTraceEnabled()) { + LOG.trace("updateMasterKey: s = " + s + ", keySeq = " + keySeq); + } + masterKeys.put(keySeq, s); + } + + @Override + public boolean removeMasterKey(int keySeq) { + if (LOG.isTraceEnabled()) { + LOG.trace("removeMasterKey: keySeq = " + keySeq); + } + return masterKeys.remove(keySeq) != null; + } + + @Override + public String[] getMasterKeys() { + return masterKeys.values().toArray(new String[0]); + } + + @Override + public boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) { + DelegationTokenInformation tokenInfo = tokens.putIfAbsent(tokenIdentifier, token); + if (LOG.isTraceEnabled()) { + LOG.trace("addToken: tokenIdentifier = " + tokenIdentifier + ", addded = " + (tokenInfo == null)); + } + return (tokenInfo == null); + } + + @Override + public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { + DelegationTokenInformation tokenInfo = tokens.remove(tokenIdentifier); + if (LOG.isTraceEnabled()) { + LOG.trace("removeToken: tokenIdentifier = " + tokenIdentifier + ", removed = " + (tokenInfo != null)); + } + return tokenInfo != null; + } + + @Override + public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { + DelegationTokenInformation result = tokens.get(tokenIdentifier); + if (LOG.isTraceEnabled()) { + LOG.trace("getToken: tokenIdentifier = " + tokenIdentifier + ", result = " + result); + } + return result; + } + + @Override + public List getAllDelegationTokenIdentifiers() { + List result = new ArrayList( + tokens.size()); + for (DelegationTokenIdentifier id : tokens.keySet()) { + result.add(id); + } + return result; + } + + @Override + public void close() throws IOException { + //no-op + } + + @Override + public void init(Object hmsHandler, ServerMode smode) throws TokenStoreException { + // no-op + } +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java new file mode 100644 index 0000000..8146d51 --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/TokenStoreDelegationTokenSecretManager.java @@ -0,0 +1,338 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.DelegationKey; +import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; +import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Extension of {@link DelegationTokenSecretManager} to support alternative to default in-memory + * token management for fail-over and clustering through plug-able token store (ZooKeeper etc.). + * Delegation tokens will be retrieved from the store on-demand and (unlike base class behavior) not + * cached in memory. This avoids complexities related to token expiration. The security token is + * needed only at the time the transport is opened (as opposed to per interface operation). The + * assumption therefore is low cost of interprocess token retrieval (for random read efficient store + * such as ZooKeeper) compared to overhead of synchronizing per-process in-memory token caches. + * The wrapper incorporates the token store abstraction within the limitations of current + * Hive/Hadoop dependency (.20S) with minimum code duplication. + * Eventually this should be supported by Hadoop security directly. + */ +public class TokenStoreDelegationTokenSecretManager extends DelegationTokenSecretManager { + + private static final Logger LOGGER = + LoggerFactory.getLogger(TokenStoreDelegationTokenSecretManager.class.getName()); + + final private long keyUpdateInterval; + final private long tokenRemoverScanInterval; + private Thread tokenRemoverThread; + + final private DelegationTokenStore tokenStore; + + public TokenStoreDelegationTokenSecretManager(long delegationKeyUpdateInterval, + long delegationTokenMaxLifetime, long delegationTokenRenewInterval, + long delegationTokenRemoverScanInterval, + DelegationTokenStore sharedStore) { + super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, + delegationTokenRemoverScanInterval); + this.keyUpdateInterval = delegationKeyUpdateInterval; + this.tokenRemoverScanInterval = delegationTokenRemoverScanInterval; + + this.tokenStore = sharedStore; + } + + protected DelegationTokenIdentifier getTokenIdentifier(Token token) + throws IOException { + // turn bytes back into identifier for cache lookup + ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); + DataInputStream in = new DataInputStream(buf); + DelegationTokenIdentifier id = createIdentifier(); + id.readFields(in); + return id; + } + + protected Map reloadKeys() { + // read keys from token store + String[] allKeys = tokenStore.getMasterKeys(); + Map keys + = new HashMap(allKeys.length); + for (String keyStr : allKeys) { + DelegationKey key = new DelegationKey(); + try { + decodeWritable(key, keyStr); + keys.put(key.getKeyId(), key); + } catch (IOException ex) { + LOGGER.error("Failed to load master key.", ex); + } + } + synchronized (this) { + super.allKeys.clear(); + super.allKeys.putAll(keys); + } + return keys; + } + + @Override + public byte[] retrievePassword(DelegationTokenIdentifier identifier) throws InvalidToken { + DelegationTokenInformation info = this.tokenStore.getToken(identifier); + if (info == null) { + throw new InvalidToken("token expired or does not exist: " + identifier); + } + // must reuse super as info.getPassword is not accessible + synchronized (this) { + try { + super.currentTokens.put(identifier, info); + return super.retrievePassword(identifier); + } finally { + super.currentTokens.remove(identifier); + } + } + } + + @Override + public DelegationTokenIdentifier cancelToken(Token token, + String canceller) throws IOException { + DelegationTokenIdentifier id = getTokenIdentifier(token); + LOGGER.info("Token cancelation requested for identifier: "+id); + this.tokenStore.removeToken(id); + return id; + } + + /** + * Create the password and add it to shared store. + */ + @Override + protected byte[] createPassword(DelegationTokenIdentifier id) { + byte[] password; + DelegationTokenInformation info; + synchronized (this) { + password = super.createPassword(id); + // add new token to shared store + // need to persist expiration along with password + info = super.currentTokens.remove(id); + if (info == null) { + throw new IllegalStateException("Failed to retrieve token after creation"); + } + } + this.tokenStore.addToken(id, info); + return password; + } + + @Override + public long renewToken(Token token, + String renewer) throws InvalidToken, IOException { + // since renewal is KERBEROS authenticated token may not be cached + final DelegationTokenIdentifier id = getTokenIdentifier(token); + DelegationTokenInformation tokenInfo = this.tokenStore.getToken(id); + if (tokenInfo == null) { + throw new InvalidToken("token does not exist: " + id); // no token found + } + // ensure associated master key is available + if (!super.allKeys.containsKey(id.getMasterKeyId())) { + LOGGER.info("Unknown master key (id={}), (re)loading keys from token store.", + id.getMasterKeyId()); + reloadKeys(); + } + // reuse super renewal logic + synchronized (this) { + super.currentTokens.put(id, tokenInfo); + try { + return super.renewToken(token, renewer); + } finally { + super.currentTokens.remove(id); + } + } + } + + public static String encodeWritable(Writable key) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + key.write(dos); + dos.flush(); + return Base64.encodeBase64URLSafeString(bos.toByteArray()); + } + + public static void decodeWritable(Writable w, String idStr) throws IOException { + DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr))); + w.readFields(in); + } + + /** + * Synchronize master key updates / sequence generation for multiple nodes. + * NOTE: {@Link AbstractDelegationTokenSecretManager} keeps currentKey private, so we need + * to utilize this "hook" to manipulate the key through the object reference. + * This .20S workaround should cease to exist when Hadoop supports token store. + */ + @Override + protected void logUpdateMasterKey(DelegationKey key) throws IOException { + int keySeq = this.tokenStore.addMasterKey(encodeWritable(key)); + // update key with assigned identifier + DelegationKey keyWithSeq = new DelegationKey(keySeq, key.getExpiryDate(), key.getKey()); + String keyStr = encodeWritable(keyWithSeq); + this.tokenStore.updateMasterKey(keySeq, keyStr); + decodeWritable(key, keyStr); + LOGGER.info("New master key with key id={}", key.getKeyId()); + super.logUpdateMasterKey(key); + } + + @Override + public synchronized void startThreads() throws IOException { + try { + // updateCurrentKey needs to be called to initialize the master key + // (there should be a null check added in the future in rollMasterKey) + // updateCurrentKey(); + Method m = AbstractDelegationTokenSecretManager.class.getDeclaredMethod("updateCurrentKey"); + m.setAccessible(true); + m.invoke(this); + } catch (Exception e) { + throw new IOException("Failed to initialize master key", e); + } + running = true; + tokenRemoverThread = new Daemon(new ExpiredTokenRemover()); + tokenRemoverThread.start(); + } + + @Override + public synchronized void stopThreads() { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Stopping expired delegation token remover thread"); + } + running = false; + if (tokenRemoverThread != null) { + tokenRemoverThread.interrupt(); + } + } + + /** + * Remove expired tokens. Replaces logic in {@link AbstractDelegationTokenSecretManager} + * that cannot be reused due to private method access. Logic here can more efficiently + * deal with external token store by only loading into memory the minimum data needed. + */ + protected void removeExpiredTokens() { + long now = System.currentTimeMillis(); + Iterator i = tokenStore.getAllDelegationTokenIdentifiers() + .iterator(); + while (i.hasNext()) { + DelegationTokenIdentifier id = i.next(); + if (now > id.getMaxDate()) { + this.tokenStore.removeToken(id); // no need to look at token info + } else { + // get token info to check renew date + DelegationTokenInformation tokenInfo = tokenStore.getToken(id); + if (tokenInfo != null) { + if (now > tokenInfo.getRenewDate()) { + this.tokenStore.removeToken(id); + } + } + } + } + } + + /** + * Extension of rollMasterKey to remove expired keys from store. + * + * @throws IOException + */ + protected void rollMasterKeyExt() throws IOException { + Map keys = reloadKeys(); + int currentKeyId = super.currentId; + HiveDelegationTokenSupport.rollMasterKey(TokenStoreDelegationTokenSecretManager.this); + List keysAfterRoll = Arrays.asList(getAllKeys()); + for (DelegationKey key : keysAfterRoll) { + keys.remove(key.getKeyId()); + if (key.getKeyId() == currentKeyId) { + tokenStore.updateMasterKey(currentKeyId, encodeWritable(key)); + } + } + for (DelegationKey expiredKey : keys.values()) { + LOGGER.info("Removing expired key id={}", expiredKey.getKeyId()); + try { + tokenStore.removeMasterKey(expiredKey.getKeyId()); + } catch (Exception e) { + LOGGER.error("Error removing expired key id={}", expiredKey.getKeyId(), e); + } + } + } + + /** + * Cloned from {@link AbstractDelegationTokenSecretManager} to deal with private access + * restriction (there would not be an need to clone the remove thread if the remove logic was + * protected/extensible). + */ + protected class ExpiredTokenRemover extends Thread { + private long lastMasterKeyUpdate; + private long lastTokenCacheCleanup; + + @Override + public void run() { + LOGGER.info("Starting expired delegation token remover thread, " + + "tokenRemoverScanInterval=" + tokenRemoverScanInterval + / (60 * 1000) + " min(s)"); + try { + while (running) { + long now = System.currentTimeMillis(); + if (lastMasterKeyUpdate + keyUpdateInterval < now) { + try { + rollMasterKeyExt(); + lastMasterKeyUpdate = now; + } catch (IOException e) { + LOGGER.error("Master key updating failed. " + + StringUtils.stringifyException(e)); + } + } + if (lastTokenCacheCleanup + tokenRemoverScanInterval < now) { + removeExpiredTokens(); + lastTokenCacheCleanup = now; + } + try { + Thread.sleep(5000); // 5 seconds + } catch (InterruptedException ie) { + LOGGER + .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread " + + ie); + } + } + } catch (Throwable t) { + LOGGER.error("ExpiredTokenRemover thread received unexpected exception. " + + t, t); + Runtime.getRuntime().exit(-1); + } + } + } + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java new file mode 100644 index 0000000..e46f293 --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java @@ -0,0 +1,472 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.framework.api.ACLProvider; +import org.apache.curator.framework.imps.CuratorFrameworkState; +import org.apache.curator.retry.ExponentialBackoffRetry; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; +import org.apache.hadoop.security.token.delegation.HiveDelegationTokenSupport; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooDefs.Perms; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ZooKeeper token store implementation. + */ +public class ZooKeeperTokenStore implements DelegationTokenStore { + + private static final Logger LOGGER = + LoggerFactory.getLogger(ZooKeeperTokenStore.class.getName()); + + protected static final String ZK_SEQ_FORMAT = "%010d"; + private static final String NODE_KEYS = "/keys"; + private static final String NODE_TOKENS = "/tokens"; + + private String rootNode = ""; + private volatile CuratorFramework zkSession; + private String zkConnectString; + private int connectTimeoutMillis; + private List newNodeAcl = Arrays.asList(new ACL(Perms.ALL, Ids.AUTH_IDS)); + + /** + * ACLProvider permissions will be used in case parent dirs need to be created + */ + private final ACLProvider aclDefaultProvider = new ACLProvider() { + + @Override + public List getDefaultAcl() { + return newNodeAcl; + } + + @Override + public List getAclForPath(String path) { + return getDefaultAcl(); + } + }; + + + private ServerMode serverMode; + + private final String WHEN_ZK_DSTORE_MSG = "when zookeeper based delegation token storage is enabled" + + "(hive.cluster.delegation.token.store.class=" + ZooKeeperTokenStore.class.getName() + ")"; + + private Configuration conf; + + /** + * Default constructor for dynamic instantiation w/ Configurable + * (ReflectionUtils does not support Configuration constructor injection). + */ + protected ZooKeeperTokenStore() { + } + + private CuratorFramework getSession() { + if (zkSession == null || zkSession.getState() == CuratorFrameworkState.STOPPED) { + synchronized (this) { + if (zkSession == null || zkSession.getState() == CuratorFrameworkState.STOPPED) { + zkSession = + CuratorFrameworkFactory.builder().connectString(zkConnectString) + .connectionTimeoutMs(connectTimeoutMillis).aclProvider(aclDefaultProvider) + .retryPolicy(new ExponentialBackoffRetry(1000, 3)).build(); + zkSession.start(); + } + } + } + return zkSession; + } + + private void setupJAASConfig(Configuration conf) throws IOException { + if (!UserGroupInformation.getLoginUser().isFromKeytab()) { + // The process has not logged in using keytab + // this should be a test mode, can't use keytab to authenticate + // with zookeeper. + LOGGER.warn("Login is not from keytab"); + return; + } + + String principal; + String keytab; + switch (serverMode) { + case METASTORE: + principal = getNonEmptyConfVar(conf, "hive.metastore.kerberos.principal"); + keytab = getNonEmptyConfVar(conf, "hive.metastore.kerberos.keytab.file"); + break; + case HIVESERVER2: + principal = getNonEmptyConfVar(conf, "hive.server2.authentication.kerberos.principal"); + keytab = getNonEmptyConfVar(conf, "hive.server2.authentication.kerberos.keytab"); + break; + default: + throw new AssertionError("Unexpected server mode " + serverMode); + } + Utils.setZookeeperClientKerberosJaasConfig(principal, keytab); + } + + private String getNonEmptyConfVar(Configuration conf, String param) throws IOException { + String val = conf.get(param); + if (val == null || val.trim().isEmpty()) { + throw new IOException("Configuration parameter " + param + " should be set, " + + WHEN_ZK_DSTORE_MSG); + } + return val; + } + + /** + * Create a path if it does not already exist ("mkdir -p") + * @param path string with '/' separator + * @param acl list of ACL entries + * @throws TokenStoreException + */ + public void ensurePath(String path, List acl) + throws TokenStoreException { + try { + CuratorFramework zk = getSession(); + String node = zk.create().creatingParentsIfNeeded().withMode(CreateMode.PERSISTENT) + .withACL(acl).forPath(path); + LOGGER.info("Created path: {} ", node); + } catch (KeeperException.NodeExistsException e) { + // node already exists + } catch (Exception e) { + throw new TokenStoreException("Error creating path " + path, e); + } + } + + /** + * Parse ACL permission string, from ZooKeeperMain private method + * @param permString + * @return + */ + public static int getPermFromString(String permString) { + int perm = 0; + for (int i = 0; i < permString.length(); i++) { + switch (permString.charAt(i)) { + case 'r': + perm |= ZooDefs.Perms.READ; + break; + case 'w': + perm |= ZooDefs.Perms.WRITE; + break; + case 'c': + perm |= ZooDefs.Perms.CREATE; + break; + case 'd': + perm |= ZooDefs.Perms.DELETE; + break; + case 'a': + perm |= ZooDefs.Perms.ADMIN; + break; + default: + LOGGER.error("Unknown perm type: " + permString.charAt(i)); + } + } + return perm; + } + + /** + * Parse comma separated list of ACL entries to secure generated nodes, e.g. + * sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa + * @param aclString + * @return ACL list + */ + public static List parseACLs(String aclString) { + String[] aclComps = StringUtils.splitByWholeSeparator(aclString, ","); + List acl = new ArrayList(aclComps.length); + for (String a : aclComps) { + if (StringUtils.isBlank(a)) { + continue; + } + a = a.trim(); + // from ZooKeeperMain private method + int firstColon = a.indexOf(':'); + int lastColon = a.lastIndexOf(':'); + if (firstColon == -1 || lastColon == -1 || firstColon == lastColon) { + LOGGER.error(a + " does not have the form scheme:id:perm"); + continue; + } + ACL newAcl = new ACL(); + newAcl.setId(new Id(a.substring(0, firstColon), a.substring( + firstColon + 1, lastColon))); + newAcl.setPerms(getPermFromString(a.substring(lastColon + 1))); + acl.add(newAcl); + } + return acl; + } + + private void initClientAndPaths() { + if (this.zkSession != null) { + this.zkSession.close(); + } + try { + ensurePath(rootNode + NODE_KEYS, newNodeAcl); + ensurePath(rootNode + NODE_TOKENS, newNodeAcl); + } catch (TokenStoreException e) { + throw e; + } + } + + @Override + public void setConf(Configuration conf) { + if (conf == null) { + throw new IllegalArgumentException("conf is null"); + } + this.conf = conf; + } + + @Override + public Configuration getConf() { + return null; // not required + } + + private Map getAllKeys() throws KeeperException, InterruptedException { + + String masterKeyNode = rootNode + NODE_KEYS; + + // get children of key node + List nodes = zkGetChildren(masterKeyNode); + + // read each child node, add to results + Map result = new HashMap(); + for (String node : nodes) { + String nodePath = masterKeyNode + "/" + node; + byte[] data = zkGetData(nodePath); + if (data != null) { + result.put(getSeq(node), data); + } + } + return result; + } + + private List zkGetChildren(String path) { + CuratorFramework zk = getSession(); + try { + return zk.getChildren().forPath(path); + } catch (Exception e) { + throw new TokenStoreException("Error getting children for " + path, e); + } + } + + private byte[] zkGetData(String nodePath) { + CuratorFramework zk = getSession(); + try { + return zk.getData().forPath(nodePath); + } catch (KeeperException.NoNodeException ex) { + return null; + } catch (Exception e) { + throw new TokenStoreException("Error reading " + nodePath, e); + } + } + + private int getSeq(String path) { + String[] pathComps = path.split("/"); + return Integer.parseInt(pathComps[pathComps.length-1]); + } + + @Override + public int addMasterKey(String s) { + String keysPath = rootNode + NODE_KEYS + "/"; + CuratorFramework zk = getSession(); + String newNode; + try { + newNode = zk.create().withMode(CreateMode.PERSISTENT_SEQUENTIAL).withACL(newNodeAcl) + .forPath(keysPath, s.getBytes()); + } catch (Exception e) { + throw new TokenStoreException("Error creating new node with path " + keysPath, e); + } + LOGGER.info("Added key {}", newNode); + return getSeq(newNode); + } + + @Override + public void updateMasterKey(int keySeq, String s) { + CuratorFramework zk = getSession(); + String keyPath = rootNode + NODE_KEYS + "/" + String.format(ZK_SEQ_FORMAT, keySeq); + try { + zk.setData().forPath(keyPath, s.getBytes()); + } catch (Exception e) { + throw new TokenStoreException("Error setting data in " + keyPath, e); + } + } + + @Override + public boolean removeMasterKey(int keySeq) { + String keyPath = rootNode + NODE_KEYS + "/" + String.format(ZK_SEQ_FORMAT, keySeq); + zkDelete(keyPath); + return true; + } + + private void zkDelete(String path) { + CuratorFramework zk = getSession(); + try { + zk.delete().forPath(path); + } catch (KeeperException.NoNodeException ex) { + // already deleted + } catch (Exception e) { + throw new TokenStoreException("Error deleting " + path, e); + } + } + + @Override + public String[] getMasterKeys() { + try { + Map allKeys = getAllKeys(); + String[] result = new String[allKeys.size()]; + int resultIdx = 0; + for (byte[] keyBytes : allKeys.values()) { + result[resultIdx++] = new String(keyBytes); + } + return result; + } catch (KeeperException ex) { + throw new TokenStoreException(ex); + } catch (InterruptedException ex) { + throw new TokenStoreException(ex); + } + } + + + private String getTokenPath(DelegationTokenIdentifier tokenIdentifier) { + try { + return rootNode + NODE_TOKENS + "/" + + TokenStoreDelegationTokenSecretManager.encodeWritable(tokenIdentifier); + } catch (IOException ex) { + throw new TokenStoreException("Failed to encode token identifier", ex); + } + } + + @Override + public boolean addToken(DelegationTokenIdentifier tokenIdentifier, + DelegationTokenInformation token) { + byte[] tokenBytes = HiveDelegationTokenSupport.encodeDelegationTokenInformation(token); + String tokenPath = getTokenPath(tokenIdentifier); + CuratorFramework zk = getSession(); + String newNode; + try { + newNode = zk.create().withMode(CreateMode.PERSISTENT).withACL(newNodeAcl) + .forPath(tokenPath, tokenBytes); + } catch (Exception e) { + throw new TokenStoreException("Error creating new node with path " + tokenPath, e); + } + + LOGGER.info("Added token: {}", newNode); + return true; + } + + @Override + public boolean removeToken(DelegationTokenIdentifier tokenIdentifier) { + String tokenPath = getTokenPath(tokenIdentifier); + zkDelete(tokenPath); + return true; + } + + @Override + public DelegationTokenInformation getToken(DelegationTokenIdentifier tokenIdentifier) { + byte[] tokenBytes = zkGetData(getTokenPath(tokenIdentifier)); + try { + return HiveDelegationTokenSupport.decodeDelegationTokenInformation(tokenBytes); + } catch (Exception ex) { + throw new TokenStoreException("Failed to decode token", ex); + } + } + + @Override + public List getAllDelegationTokenIdentifiers() { + String containerNode = rootNode + NODE_TOKENS; + final List nodes = zkGetChildren(containerNode); + List result = new java.util.ArrayList( + nodes.size()); + for (String node : nodes) { + DelegationTokenIdentifier id = new DelegationTokenIdentifier(); + try { + TokenStoreDelegationTokenSecretManager.decodeWritable(id, node); + result.add(id); + } catch (Exception e) { + LOGGER.warn("Failed to decode token '{}'", node); + } + } + return result; + } + + @Override + public void close() throws IOException { + if (this.zkSession != null) { + this.zkSession.close(); + } + } + + @Override + public void init(Object objectStore, ServerMode smode) { + this.serverMode = smode; + zkConnectString = + conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null); + if (zkConnectString == null || zkConnectString.trim().isEmpty()) { + // try alternate config param + zkConnectString = + conf.get( + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE, + null); + if (zkConnectString == null || zkConnectString.trim().isEmpty()) { + throw new IllegalArgumentException("Zookeeper connect string has to be specifed through " + + "either " + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR + + " or " + + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR_ALTERNATE + + WHEN_ZK_DSTORE_MSG); + } + } + connectTimeoutMillis = + conf.getInt( + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_TIMEOUTMILLIS, + CuratorFrameworkFactory.builder().getConnectionTimeoutMs()); + String aclStr = conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ACL, null); + if (StringUtils.isNotBlank(aclStr)) { + this.newNodeAcl = parseACLs(aclStr); + } + rootNode = + conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE, + HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_ZNODE_DEFAULT) + serverMode; + + try { + // Install the JAAS Configuration for the runtime + setupJAASConfig(conf); + } catch (IOException e) { + throw new TokenStoreException("Error setting up JAAS configuration for zookeeper client " + + e.getMessage(), e); + } + initClientAndPaths(); + } + +} diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java new file mode 100644 index 0000000..fe18706 --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/client/TUGIAssumingTransport.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.thrift.client; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; + +import org.apache.hadoop.hive.thrift.TFilterTransport; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.transport.TTransport; +import org.apache.thrift.transport.TTransportException; + +/** + * The Thrift SASL transports call Sasl.createSaslServer and Sasl.createSaslClient + * inside open(). So, we need to assume the correct UGI when the transport is opened + * so that the SASL mechanisms have access to the right principal. This transport + * wraps the Sasl transports to set up the right UGI context for open(). + * + * This is used on the client side, where the API explicitly opens a transport to + * the server. + */ + public class TUGIAssumingTransport extends TFilterTransport { + protected UserGroupInformation ugi; + + public TUGIAssumingTransport(TTransport wrapped, UserGroupInformation ugi) { + super(wrapped); + this.ugi = ugi; + } + + @Override + public void open() throws TTransportException { + try { + ugi.doAs(new PrivilegedExceptionAction() { + public Void run() { + try { + wrapped.open(); + } catch (TTransportException tte) { + // Wrap the transport exception in an RTE, since UGI.doAs() then goes + // and unwraps this for us out of the doAs block. We then unwrap one + // more time in our catch clause to get back the TTE. (ugh) + throw new RuntimeException(tte); + } + return null; + } + }); + } catch (IOException ioe) { + throw new RuntimeException("Received an ioe we never threw!", ioe); + } catch (InterruptedException ie) { + throw new RuntimeException("Received an ie we never threw!", ie); + } catch (RuntimeException rte) { + if (rte.getCause() instanceof TTransportException) { + throw (TTransportException)rte.getCause(); + } else { + throw rte; + } + } + } + } diff --git a/shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java b/shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java new file mode 100644 index 0000000..6b39a14 --- /dev/null +++ b/shims/common/src/main/java/org/apache/hadoop/security/token/delegation/HiveDelegationTokenSupport.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.token.delegation; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager.DelegationTokenInformation; + +/** + * Workaround for serialization of {@link DelegationTokenInformation} through package access. + * Future version of Hadoop should add this to DelegationTokenInformation itself. + */ +public final class HiveDelegationTokenSupport { + + private HiveDelegationTokenSupport() {} + + public static byte[] encodeDelegationTokenInformation(DelegationTokenInformation token) { + try { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(bos); + WritableUtils.writeVInt(out, token.password.length); + out.write(token.password); + out.writeLong(token.renewDate); + out.flush(); + return bos.toByteArray(); + } catch (IOException ex) { + throw new RuntimeException("Failed to encode token.", ex); + } + } + + public static DelegationTokenInformation decodeDelegationTokenInformation(byte[] tokenBytes) + throws IOException { + DataInputStream in = new DataInputStream(new ByteArrayInputStream(tokenBytes)); + DelegationTokenInformation token = new DelegationTokenInformation(0, null); + int len = WritableUtils.readVInt(in); + token.password = new byte[len]; + in.readFully(token.password); + token.renewDate = in.readLong(); + return token; + } + + public static void rollMasterKey( + AbstractDelegationTokenSecretManager mgr) + throws IOException { + mgr.rollMasterKey(); + } + +} diff --git a/shims/pom.xml b/shims/pom.xml index d43086f..679b7c0 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -33,8 +33,6 @@ common - 0.20 - common-secure 0.20S 0.23 scheduler diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java index 6498682..3026ea0 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java @@ -107,6 +107,10 @@ public void execute() throws Throwable { JUnitReportParser parser = new JUnitReportParser(logger, batchLogDir); executedTests.addAll(parser.getExecutedTests()); failedTests.addAll(parser.getFailedTests()); + // if the TEST*.xml was not generated or was corrupt, let someone know + if (parser.getNumAttemptedTests() == 0) { + failedTests.add(batch.getName() + " - did not produce a TEST-*.xml file"); + } } } finally { long elapsed = System.currentTimeMillis() - start; diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java index 09147f4..b05d2c2 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/HostExecutor.java @@ -29,7 +29,6 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; -import org.apache.commons.io.FileUtils; import org.apache.hive.ptest.execution.conf.Host; import org.apache.hive.ptest.execution.conf.TestBatch; import org.apache.hive.ptest.execution.ssh.RSyncCommand; @@ -218,7 +217,7 @@ private boolean executeTestBatch(Drone drone, TestBatch batch, Set fa script.delete(); mLogger.info(drone + " executing " + batch + " with " + command); RemoteCommandResult sshResult = new SSHCommand(mSSHCommandExecutor, drone.getPrivateKey(), drone.getUser(), - drone.getHost(), drone.getInstance(), command). + drone.getHost(), drone.getInstance(), command, true). call(); File batchLogDir = null; if(sshResult.getExitCode() == Constants.EXIT_CODE_UNKNOWN) { @@ -346,8 +345,22 @@ RSyncResult copyFromDroneToLocal(Drone drone, String localFile, String remoteFil * exits with a status code of 255 until all drones have been utilized, possibly * excluding the host from future use. */ + ListenableFuture execIgnoreAllErrors(final String cmd) + throws Exception { + return exec(cmd, false); + } + /** + * Execute command on at least one drone. The method will retry when the command + * exits with a status code of 255 until all drones have been utilized, possibly + * excluding the host from future use. + */ ListenableFuture exec(final String cmd) throws Exception { + return exec(cmd, true); + } + + private ListenableFuture exec(final String cmd, final boolean reportErrors) + throws Exception { return mExecutor.submit(new Callable() { @Override public SSHResult call() throws Exception { @@ -357,8 +370,8 @@ public SSHResult call() throws Exception { templateVariables.put("localDir", drone.getLocalDirectory()); String command = Templates.getTemplateResult(cmd, templateVariables); SSHResult result = new SSHCommand(mSSHCommandExecutor, drone.getPrivateKey(), drone.getUser(), - drone.getHost(), drone.getInstance(), command).call(); - if(result.getExitCode() == Constants.EXIT_CODE_UNKNOWN) { + drone.getHost(), drone.getInstance(), command, reportErrors).call(); + if(reportErrors && result.getExitCode() == Constants.EXIT_CODE_UNKNOWN) { mDrones.remove(drone); // return value not checked due to concurrent access mLogger.error("Aborting drone during exec " + command, new AbortDroneException("Drone " + drone + " exited with " @@ -388,7 +401,7 @@ public RemoteCommandResult call() throws Exception { templateVariables.put("localDir", drone.getLocalDirectory()); String command = Templates.getTemplateResult(cmd, templateVariables); SSHResult result = new SSHCommand(mSSHCommandExecutor, drone.getPrivateKey(), drone.getUser(), - drone.getHost(), drone.getInstance(), command).call(); + drone.getHost(), drone.getInstance(), command, true).call(); if(result.getExitCode() != Constants.EXIT_CODE_SUCCESS) { mDrones.remove(drone); // return value not checked due to concurrent access mLogger.error("Aborting drone during exec " + command, diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JUnitReportParser.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JUnitReportParser.java index 0cc816c..cc9f617 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JUnitReportParser.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JUnitReportParser.java @@ -76,6 +76,9 @@ public JUnitReportParser(Logger logger, File directory) throws Exception { } return failedTests; } + public int getNumAttemptedTests() { + return getExecutedTests().size() + getFailedTests().size(); + } private void parse() { for(File file : getFiles(directory)) { FileInputStream stream = null; diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java index fa2d8e7..47633ec 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/Phase.java @@ -88,6 +88,14 @@ protected void execLocally(String command) } return toListOfResults(futures); } + protected List execHostsIgnoreErrors(String command) + throws Exception { + List> futures = Lists.newArrayList(); + for(HostExecutor hostExecutor : hostExecutors) { + futures.add(hostExecutor.execIgnoreAllErrors(command)); + } + return toListOfResults(futures, false); + } // clean prep protected List execInstances(String command) throws Exception { @@ -159,10 +167,15 @@ protected void execLocally(String command) } private List toListOfResults(List> futures) throws Exception { + return toListOfResults(futures, true); + } + private List toListOfResults(List> futures, + boolean reportErrors) + throws Exception { List results = Lists.newArrayList(); for(T result : Futures.allAsList(futures).get()) { if(result != null) { - if(result.getException() != null || result.getExitCode() != 0) { + if(reportErrors && (result.getException() != null || result.getExitCode() != 0)) { throw new SSHExecutionException(result); } results.add(result); diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PrepPhase.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PrepPhase.java index 02d7b6c..825f0c0 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PrepPhase.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/PrepPhase.java @@ -65,6 +65,8 @@ public void execute() throws Exception { Templates.writeTemplateResult("source-prep.vm", sourcePrepScript, getTemplateDefaults()); execLocally("bash " + sourcePrepScript.getPath()); logger.debug("Deleting " + sourcePrepScript + ": " + sourcePrepScript.delete()); + execHostsIgnoreErrors("pkill -f java"); + execHostsIgnoreErrors("pkill -9 -f java"); elapsedTime = TimeUnit.MINUTES.convert((System.currentTimeMillis() - start), TimeUnit.MILLISECONDS); logger.info("PERF: source prep took " + elapsedTime + " minutes"); diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/context/CloudExecutionContextProvider.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/context/CloudExecutionContextProvider.java index 9ef9cd6..e7a5752 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/context/CloudExecutionContextProvider.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/context/CloudExecutionContextProvider.java @@ -286,7 +286,7 @@ private static String publicIp(NodeMetadata node) { @Override public void run() { String ip = publicIpOrHostname(node); - SSHCommand command = new SSHCommand(mSSHCommandExecutor, mPrivateKey, mUser, ip, 0, "pkill -f java"); + SSHCommand command = new SSHCommand(mSSHCommandExecutor, mPrivateKey, mUser, ip, 0, "pkill -f java", true); mSSHCommandExecutor.execute(command); if(command.getExitCode() == Constants.EXIT_CODE_UNKNOWN || command.getException() != null) { diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommand.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommand.java index d027e44..47c2d91 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommand.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommand.java @@ -22,12 +22,14 @@ private final SSHCommandExecutor executor; private final String command; + private final boolean reportErrors; public SSHCommand(SSHCommandExecutor executor, String privateKey, - String user, String host, int instance, String command) { + String user, String host, int instance, String command, boolean reportErrors) { super(privateKey, user, host, instance); this.executor = executor; this.command = command; + this.reportErrors = reportErrors; } @Override @@ -37,6 +39,10 @@ public SSHResult call() { getExitCode(), getException(), getOutput()); } + public boolean isReportErrors() { + return reportErrors; + } + public String getCommand() { return command; } diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommandExecutor.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommandExecutor.java index 8c9f707..e335b55 100644 --- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommandExecutor.java +++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ssh/SSHCommandExecutor.java @@ -63,7 +63,7 @@ public void execute(SSHCommand command) { command.setExitCode(Constants.EXIT_CODE_UNKNOWN); return; } - if(attempts++ <= 3 && cmd.getExitCode() == Constants.EXIT_CODE_UNKNOWN) { + if(command.isReportErrors() && attempts++ <= 3 && cmd.getExitCode() == Constants.EXIT_CODE_UNKNOWN) { mLogger.warn("Command exited with " + cmd.getExitCode() + ", will retry: " + command); retry = true; TimeUnit.SECONDS.sleep(5); diff --git a/testutils/ptest2/src/main/resources/batch-exec.vm b/testutils/ptest2/src/main/resources/batch-exec.vm index 6289785..da3e0ac 100644 --- a/testutils/ptest2/src/main/resources/batch-exec.vm +++ b/testutils/ptest2/src/main/resources/batch-exec.vm @@ -86,7 +86,7 @@ fi echo $pid >> batch.pid wait $pid ret=$? -find ./ -type f -name hive.log | \ +find ./ -type f -name hive.log -o -name spark.log | \ xargs -I {} sh -c 'f=$(basename {}); test -f ${logDir}/$f && f=$f-$(uuidgen); mv {} ${logDir}/$f' find ./ -type f -name 'TEST-*.xml' | \ xargs -I {} sh -c 'f=TEST-${batchName}-$(basename {}); test -f ${logDir}/$f && f=$f-$(uuidgen); mv {} ${logDir}/$f' diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestPrepPhase.testExecute.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestPrepPhase.testExecute.approved.txt index 7d35d84..4dc7fca 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestPrepPhase.testExecute.approved.txt +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestPrepPhase.testExecute.approved.txt @@ -1,3 +1,5 @@ bash /tmp/hive-ptest-units/TestPrepPhase/source-prep.sh mkdir -p /some/working/dir/scratch +pkill -9 -f java +pkill -f java rm -rf /some/working/dir/scratch \ No newline at end of file diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/ssh/TestSSHCommandExecutor.java b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/ssh/TestSSHCommandExecutor.java index f81535f..9499797 100644 --- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/ssh/TestSSHCommandExecutor.java +++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/ssh/TestSSHCommandExecutor.java @@ -54,7 +54,7 @@ public void testShutdownBeforeWaitFor() throws Exception { "-o StrictHostKeyChecking=no"); Assert.assertFalse(executor.isShutdown()); executor.shutdownNow(); - SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami"); + SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami", true); executor.execute(command); Assert.assertTrue(executor.isShutdown()); Assert.assertEquals(Constants.EXIT_CODE_UNKNOWN, command.getExitCode()); @@ -77,7 +77,7 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { return Constants.EXIT_CODE_UNKNOWN; } }); - SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami"); + SSHCommand command = new SSHCommand(executor, "privateKey", "user", "host", 1, "whoami", true); executor.execute(command); Assert.assertTrue(executor.isShutdown()); Assert.assertEquals(Constants.EXIT_CODE_UNKNOWN, command.getExitCode());